App Engine Python SDK version 1.8.9
[gae.git] / python / php / sdk / google / appengine / ext / cloud_storage_streams / CloudStorageClient.php
blob59b58fb296ee665d031174081ce697dd1883612d
1 <?php
2 /**
3 * Copyright 2007 Google Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 /**
18 * CloudStorageClient is the base class for classes that are used to communicate
19 * with Google Cloud Storage via the PHP streams interface.
23 namespace google\appengine\ext\cloud_storage_streams;
25 use google\appengine\api\app_identity\AppIdentityService;
26 use google\appengine\api\app_identity\AppIdentityException;
27 use google\appengine\api\cloud_storage\CloudStorageTools;
28 use google\appengine\runtime\ApiProxy;
29 use google\appengine\runtime\ApplicationError;
30 use google\appengine\URLFetchRequest\RequestMethod;
31 use google\appengine\util\ArrayUtil;
33 /**
34 * CloudStorageClient provides default fail implementations for all of the
35 * methods that the stream wrapper might potentially call. Derived classes then
36 * only implement the methods that are relevant to the operations that they
37 * perform.
39 abstract class CloudStorageClient {
40 // The default chunk size that we will read from the file. This value should
41 // remain smaller than the maximum object size valid for memcache writes so
42 // we can cache the reads.
43 const DEFAULT_READ_SIZE = 524288;
45 // The default amount of time that reads will be held in the cache.
46 const DEFAULT_READ_CACHE_EXPIRY_SECONDS = 3600; // one hour
48 // The default maximum number of times that certain (see retryable_statuses)
49 // failed Google Cloud Storage requests will be retried before returning
50 // failure.
51 const DEFAULT_MAXIMUM_NUMBER_OF_RETRIES = 2;
53 // The default time the writable state of a bucket will be cached for.
54 const DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS = 600; // ten minutes
56 // Token scopers for accessing objects in Google Cloud Storage
57 const READ_SCOPE = "https://www.googleapis.com/auth/devstorage.read_only";
58 const WRITE_SCOPE = "https://www.googleapis.com/auth/devstorage.read_write";
59 const FULL_SCOPE = "https://www.googleapis.com/auth/devstorage.full_control";
61 // Format for the OAuth token header.
62 const OAUTH_TOKEN_FORMAT = "OAuth %s";
64 // Content Range Header format when the total length is unknown.
65 const PARTIAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/*";
67 // Content Range Header format when the length is known.
68 const FINAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/%d";
70 // Content Range Header for final chunk with no new data
71 const FINAL_CONTENT_RANGE_NO_DATA = "bytes */%d";
73 // A character or multiple characters that can be used to simplify a list of
74 // objects that use a directory-like naming scheme. Can be used in conjunction
75 // with a prefix.
76 const DELIMITER = '/';
78 // Cloud storage can append _$folder$ to an object name and have it behave
79 // like a regular file system folder.
80 const FOLDER_SUFFIX = '_$folder$';
82 // Temporary file name we create when checking if a bucket is writable.
83 const WRITABLE_TEMP_FILENAME = "/_ah_is_writable_temp_file";
85 // Bit fields for the stat mode field
86 const S_IFREG = 0100000;
87 const S_IFDIR = 0040000;
89 const S_IRWXU = 00700; // mask for owner permissions
90 const S_IRUSR = 00400; // read for owner
91 const S_IWUSR = 00200; // write for owner
92 const S_IXUSR = 00100; // execute for owner
94 const S_IRWXG = 00070; // mask for group permissions
95 const S_IRGRP = 00040; // read for group
96 const S_IWGRP = 00020; // write for group
97 const S_IXGRP = 00010; // execute for group
99 const S_IRWXO = 00007; // mask for other other permissions
100 const S_IROTH = 00004; // read for other
101 const S_IWOTH = 00002; // write for other
102 const S_IXOTH = 00001; // execute for other
104 // The API version header
105 private static $api_version_header = ["x-goog-api-version" => 2];
107 // Regex patterm for retrieving the Length of the content being served.
108 const CONTENT_RANGE_REGEX = "/bytes\s+(\d+)-(\d+)\/(\d+)/i";
111 * Memcache key format for caching the results of reads from GCS. The
112 * parameters are the object url (as a string) and the read range, as a
113 * string (e.g. bytes=0-512000).
114 * Example key for a cloud storage file gs://bucket/object.png
115 * _ah_gs_read_cache_https://storage.googleapis.com/bucket/object.png_bytes=0-524287
117 const MEMCACHE_KEY_FORMAT = "_ah_gs_read_cache_%s_%s";
120 * Memcache key format for caching the results of checking if a bucket is
121 * writable. The only way to check if an app can write to a bucket is by
122 * actually writing a file. As the ACL on a bucket is unlikely to change
123 * then we can cache the result.
125 const WRITABLE_MEMCACHE_KEY_FORMAT = "_ah_gs_write_bucket_cache_%s";
127 // HTTP status codes that should be retried if they are returned by a request
128 // to GCS. Retry should occur with a random exponential back-off.
129 protected static $retry_error_codes = [HttpResponse::REQUEST_TIMEOUT,
130 HttpResponse::INTERNAL_SERVER_ERROR,
131 HttpResponse::BAD_GATEWAY,
132 HttpResponse::SERVICE_UNAVAILABLE,
133 HttpResponse::GATEWAY_TIMEOUT];
135 // Values that are allowed to be supplied as ACLs when writing objects.
136 protected static $valid_acl_values = ["private",
137 "public-read",
138 "public-read-write",
139 "authenticated-read",
140 "bucket-owner-read",
141 "bucket-owner-full-control"];
143 protected static $upload_start_header = ["x-goog-resumable" => "start"];
145 // Map HTTP request types to URLFetch method enum.
146 private static $request_map = [
147 "GET" => RequestMethod::GET,
148 "POST" => RequestMethod::POST,
149 "HEAD" => RequestMethod::HEAD,
150 "PUT" => RequestMethod::PUT,
151 "DELETE" => RequestMethod::DELETE,
152 "PATCH" => RequestMethod::PATCH
155 private static $retryable_statuses = [
156 408, // Request Timeout
157 500, // Internal Server Error
158 502, // Bad Gateway
159 503, // Service Unavailable
160 504, // Gateway Timeout
163 private static $default_gs_context_options = [
164 "enable_cache" => true,
165 "enable_optimistic_cache" => false,
166 "max_retries" => self::DEFAULT_MAXIMUM_NUMBER_OF_RETRIES,
167 "read_cache_expiry_seconds" => self::DEFAULT_READ_CACHE_EXPIRY_SECONDS,
168 "writable_cache_expiry_seconds" =>
169 self::DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS,
172 protected $bucket_name; // Name of the bucket for this object.
173 protected $object_name; // The name of the object.
174 protected $context_options = []; // Any context arguments supplied on open.
175 protected $url; // GCS URL of the object.
176 protected $anonymous; // Use anonymous access when contacting GCS.
179 * Construct an object of CloudStorageClient.
181 * @param string $bucket The name of the bucket.
182 * @param string $object The name of the object, or null if there is no
183 * object.
184 * @param resource $context The stream context to use.
186 public function __construct($bucket, $object = null, $context = null) {
187 $this->bucket_name = $bucket;
188 $this->object_name = $object;
189 if (!isset($context)) {
190 $context = stream_context_get_default();
192 $context_array = stream_context_get_options($context);
193 if (array_key_exists("gs", $context_array)) {
194 $this->context_options = array_merge(self::$default_gs_context_options,
195 $context_array["gs"]);
196 } else {
197 $this->context_options = self::$default_gs_context_options;
199 $this->anonymous = ArrayUtil::findByKeyOrNull($this->context_options,
200 "anonymous");
202 $this->url = $this->createObjectUrl($bucket, $object);
205 public function __destruct() {
208 public function initialize() {
209 return false;
212 public function dir_readdir() {
213 return false;
216 public function dir_rewinddir() {
217 return false;
220 // @return nothing
221 public function close() {
224 public function delete() {
225 return false;
228 public function eof() {
229 return true;
232 public function flush() {
233 return true;
236 public function read($count_bytes) {
237 return false;
240 public function seek($offset, $whence) {
241 return false;
244 public function stat() {
245 return false;
248 public function tell() {
249 return false;
252 public function write($data) {
253 return false;
257 * Get the OAuth Token HTTP header for the supplied scope.
259 * @param $scopes mixed The scopes to acquire the token for.
261 * @return array The HTTP authorization header for the scopes, using the
262 * applications service account. False if the call failed.
264 protected function getOAuthTokenHeader($scopes) {
265 if ($this->anonymous) {
266 return [];
269 try {
270 $token = AppIdentityService::getAccessToken($scopes);
271 return ["Authorization" => sprintf(self::OAUTH_TOKEN_FORMAT,
272 $token['access_token'])];
273 } catch (AppIdentityException $e) {
274 return false;
279 * Create a URL for a target bucket and optional object.
281 * @visibleForTesting
283 public static function createObjectUrl($bucket, $object = null) {
284 // Strip leading "/" for $object
285 if (isset($object) && $object[0] == "/") {
286 $object_name = substr($object, 1);
287 } else {
288 $object_name = "";
291 $gs_filename = CloudStorageTools::getFilename($bucket, $object_name);
292 return CloudStorageTools::getPublicUrl($gs_filename, true);
296 * Return a Range HTTP header.
298 * @param $start_byte int The offset of the first byte in the range.
299 * @param $end_byte int The offset of the last byte in the range.
301 * @return array The HTTP Range header for the supplied offsets.
303 protected function getRangeHeader($start_byte, $end_byte) {
304 assert($start_byte <= $end_byte);
305 return ["Range" => sprintf("bytes=%d-%d", $start_byte, $end_byte)];
309 * Make a request to GCS using HttpStreams.
311 * Returns:
312 * headers array
313 * response body
315 protected function makeHttpRequest($url, $method, $headers, $body = null) {
316 $request_headers = array_merge($headers, self::$api_version_header);
318 $result = $this->doHttpRequest($url,
319 $method,
320 $request_headers,
321 $body);
323 if ($result === false) {
324 return false;
327 return [
328 'status_code' => $result['status_code'],
329 'headers' => $result['headers'],
330 'body' => $result['body'],
335 * Return the value of a header stored in an associative array, using a case
336 * insensitive comparison on the header name.
338 * @param $header_name string The name of the header to lookup.
339 * @param $headers array Associative array of headers.
341 * @return The value of the header if found, false otherwise.
343 protected function getHeaderValue($header_name, $headers) {
344 // Could be more than one header, in which case we keep an array.
345 foreach($headers as $key => $value) {
346 if (strcasecmp($key, $header_name) === 0) {
347 return $value;
350 return null;
356 private function doHttpRequest($url, $method, $headers, $body) {
357 $req = new \google\appengine\URLFetchRequest();
358 $req->setUrl($url);
359 $req->setMethod(self::$request_map[$method]);
360 $req->setMustValidateServerCertificate(true);
361 if (isset($body)) {
362 $req->setPayload($body);
365 foreach($headers as $key => $value) {
366 $h = $req->addHeader();
367 $h->setKey($key);
368 $h->setValue($value);
371 $resp = new \google\appengine\URLFetchResponse();
373 for ($num_retries = 0; ; $num_retries++) {
374 try {
375 ApiProxy::makeSyncCall('urlfetch', 'Fetch', $req, $resp);
376 } catch (ApplicationError $e) {
377 syslog(LOG_ERR,
378 sprintf("Call to URLFetch failed with application error %d.",
379 $e->getApplicationError()));
380 return false;
382 $status_code = $resp->getStatusCode();
384 if ($num_retries < $this->context_options['max_retries'] &&
385 in_array($status_code, self::$retryable_statuses) &&
386 (connection_status() & CONNECTION_TIMEOUT) == 0) {
387 usleep(rand(0, 1000000 * pow(2, $num_retries)));
388 if ((connection_status() & CONNECTION_TIMEOUT) == CONNECTION_TIMEOUT) {
389 break;
391 } else {
392 break;
396 $response_headers = [];
397 foreach($resp->getHeaderList() as $header) {
398 // TODO: Do we need to support multiple headers with the same key?
399 $response_headers[trim($header->getKey())] = trim($header->getValue());
402 return [
403 'status_code' => $resp->getStatusCode(),
404 'headers' => $response_headers,
405 'body' => $resp->getContent(),
410 * Generate the default stat() array, which is both associative and index
411 * based.
413 * @access private
415 protected function createStatArray($stat_args) {
416 $stat_keys = ["dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size",
417 "atime", "mtime", "ctime", "blksize", "blocks"];
419 $result = [];
421 foreach ($stat_keys as $key) {
422 $value = 0;
423 if (array_key_exists($key, $stat_args)) {
424 $value = $stat_args[$key];
426 // Add the associative entry.
427 $result[$key] = $value;
428 // Add the index entry.
429 $result[] = $value;
432 return $result;
436 * Given an xml based error response from Cloud Storage, try and extract the
437 * error code and error message according to the schema described at
438 * https://developers.google.com/storage/docs/reference-status
440 * @param string $gcs_result The response body of the last call to Google
441 * Cloud Storage.
442 * @param string $code Reference variable where the error code for the last
443 * message will be returned.
444 * @param string $message Reference variable where the error detail for the
445 * last message will be returned.
446 * @return bool True if the error code and message could be extracted, false
447 * otherwise.
449 protected function tryParseCloudStorageErrorMessage($gcs_result,
450 &$code,
451 &$message) {
452 $code = null;
453 $message = null;
455 $old_errors = libxml_use_internal_errors(true);
456 $xml = simplexml_load_string($gcs_result);
458 if (false != $xml) {
459 $code = (string) $xml->Code;
460 $message = (string) $xml->Message;
462 libxml_use_internal_errors($old_errors);
463 return (isset($code) && isset($message));
467 * Return a formatted error message for the http response.
469 * @param int $http_status_code The HTTP status code returned from the last
470 * http request.
471 * @param string $http_result The response body from the last http request.
472 * @param string $msg_prefix The prefix to add to the error message that will
473 * be generated.
475 * @return string The error message for the last HTTP response.
477 protected function getErrorMessage($http_status_code,
478 $http_result,
479 $msg_prefix = "Cloud Storage Error:") {
480 if ($this->tryParseCloudStorageErrorMessage($http_result,
481 $code,
482 $message)) {
483 return sprintf("%s %s (%s)", $msg_prefix, $message, $code);
484 } else {
485 return sprintf("%s %s",
486 $msg_prefix,
487 HttpResponse::getStatusMessage($http_status_code));