1.9.30 sync.
[gae.git] / python / php / sdk / google / appengine / ext / cloud_storage_streams / CloudStorageClient.php
blob61ab3617cdc137286591f5c5f2f958cd2c5e8977
1 <?php
2 /**
3 * Copyright 2007 Google Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 /**
18 * CloudStorageClient is the base class for classes that are used to communicate
19 * with Google Cloud Storage via the PHP streams interface.
23 namespace google\appengine\ext\cloud_storage_streams;
25 use google\appengine\api\app_identity\AppIdentityService;
26 use google\appengine\api\app_identity\AppIdentityException;
27 use google\appengine\api\cloud_storage\CloudStorageTools;
28 use google\appengine\runtime\ApiProxy;
29 use google\appengine\runtime\ApplicationError;
30 use google\appengine\URLFetchRequest\RequestMethod;
31 use google\appengine\URLFetchServiceError\ErrorCode;
32 use google\appengine\util\ArrayUtil;
33 use google\appengine\util\StringUtil;
35 /**
36 * CloudStorageClient provides default fail implementations for all of the
37 * methods that the stream wrapper might potentially call. Derived classes then
38 * only implement the methods that are relevant to the operations that they
39 * perform.
41 abstract class CloudStorageClient {
42 /**
43 * Headers that may be controlled by the user through the stream context.
45 protected static $METADATA_HEADERS = [
46 'Cache-Control',
47 'Content-Disposition',
48 'Content-Encoding',
49 'Content-Language',
50 'Content-Type',
51 // x-goog-meta-* handled separately.
54 /**
55 * Prefix for all metadata headers used when parsing and rendering.
57 const METADATA_HEADER_PREFIX = 'x-goog-meta-';
59 // The default chunk size that we will read from the file. This value should
60 // remain smaller than the maximum object size valid for memcache writes so
61 // we can cache the reads.
62 const DEFAULT_READ_SIZE = 524288;
64 // The default amount of time that reads will be held in the cache.
65 const DEFAULT_READ_CACHE_EXPIRY_SECONDS = 3600; // one hour
67 // The default maximum number of times that certain (see retryable_statuses)
68 // failed Google Cloud Storage requests will be retried before returning
69 // failure.
70 const DEFAULT_MAXIMUM_NUMBER_OF_RETRIES = 2;
72 // The default time the writable state of a bucket will be cached for.
73 const DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS = 600; // ten minutes
75 // Token scopers for accessing objects in Google Cloud Storage
76 const READ_SCOPE = "https://www.googleapis.com/auth/devstorage.read_only";
77 const WRITE_SCOPE = "https://www.googleapis.com/auth/devstorage.read_write";
78 const FULL_SCOPE = "https://www.googleapis.com/auth/devstorage.full_control";
80 // Format for the OAuth token header.
81 const OAUTH_TOKEN_FORMAT = "OAuth %s";
83 // Content Range Header format when the total length is unknown.
84 const PARTIAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/*";
86 // Content Range Header format when the length is known.
87 const FINAL_CONTENT_RANGE_FORMAT = "bytes %d-%d/%d";
89 // Content Range Header for final chunk with no new data
90 const FINAL_CONTENT_RANGE_NO_DATA = "bytes */%d";
92 // A character or multiple characters that can be used to simplify a list of
93 // objects that use a directory-like naming scheme. Can be used in conjunction
94 // with a prefix.
95 const DELIMITER = '/';
97 // Cloud storage can append _$folder$ to an object name and have it behave
98 // like a regular file system folder.
99 const FOLDER_SUFFIX = '_$folder$';
101 // Temporary file name we create when checking if a bucket is writable.
102 const WRITABLE_TEMP_FILENAME = "/_ah_is_writable_temp_file";
104 // Bit fields for the stat mode field
105 const S_IFREG = 0100000;
106 const S_IFDIR = 0040000;
108 const S_IRWXU = 00700; // mask for owner permissions
109 const S_IRUSR = 00400; // read for owner
110 const S_IWUSR = 00200; // write for owner
111 const S_IXUSR = 00100; // execute for owner
113 const S_IRWXG = 00070; // mask for group permissions
114 const S_IRGRP = 00040; // read for group
115 const S_IWGRP = 00020; // write for group
116 const S_IXGRP = 00010; // execute for group
118 const S_IRWXO = 00007; // mask for other other permissions
119 const S_IROTH = 00004; // read for other
120 const S_IWOTH = 00002; // write for other
121 const S_IXOTH = 00001; // execute for other
123 // The API version header
124 private static $api_version_header = ["x-goog-api-version" => 2];
126 // Regex patterm for retrieving the Length of the content being served.
127 const CONTENT_RANGE_REGEX = "/bytes\s+(\d+)-(\d+)\/(\d+)/i";
130 * Memcache key format for caching the results of reads from GCS. The
131 * parameters are the object url (as a string) and the read range, as a
132 * string (e.g. bytes=0-512000).
133 * Example key for a cloud storage file gs://bucket/object.png
134 * _ah_gs_read_cache_https://storage.googleapis.com/bucket/object.png_bytes=0-524287
136 const MEMCACHE_KEY_FORMAT = "_ah_gs_read_cache_%s_%s";
139 * Memcache key format for caching the results of checking if a bucket is
140 * writable. The only way to check if an app can write to a bucket is by
141 * actually writing a file. As the ACL on a bucket is unlikely to change
142 * then we can cache the result.
144 const WRITABLE_MEMCACHE_KEY_FORMAT = "_ah_gs_write_bucket_cache_%s";
146 // HTTP status codes that should be retried if they are returned by a request
147 // to GCS. Retry should occur with a random exponential back-off.
148 protected static $retry_error_codes = [HttpResponse::REQUEST_TIMEOUT,
149 HttpResponse::INTERNAL_SERVER_ERROR,
150 HttpResponse::BAD_GATEWAY,
151 HttpResponse::SERVICE_UNAVAILABLE,
152 HttpResponse::GATEWAY_TIMEOUT];
154 protected static $retry_exception_codes = [
155 ErrorCode::DEADLINE_EXCEEDED,
156 ErrorCode::FETCH_ERROR,
157 ErrorCode::INTERNAL_TRANSIENT_ERROR];
159 // Values that are allowed to be supplied as ACLs when writing objects.
160 protected static $valid_acl_values = ["private",
161 "public-read",
162 "public-read-write",
163 "authenticated-read",
164 "bucket-owner-read",
165 "bucket-owner-full-control"];
167 protected static $upload_start_header = ["x-goog-resumable" => "start"];
169 // Map HTTP request types to URLFetch method enum.
170 private static $request_map = [
171 "GET" => RequestMethod::GET,
172 "POST" => RequestMethod::POST,
173 "HEAD" => RequestMethod::HEAD,
174 "PUT" => RequestMethod::PUT,
175 "DELETE" => RequestMethod::DELETE,
176 "PATCH" => RequestMethod::PATCH
179 private static $retryable_statuses = [
180 408, // Request Timeout
181 500, // Internal Server Error
182 502, // Bad Gateway
183 503, // Service Unavailable
184 504, // Gateway Timeout
187 private static $default_gs_context_options = [
188 "enable_cache" => true,
189 "enable_optimistic_cache" => false,
190 "max_retries" => self::DEFAULT_MAXIMUM_NUMBER_OF_RETRIES,
191 "read_cache_expiry_seconds" => self::DEFAULT_READ_CACHE_EXPIRY_SECONDS,
192 "writable_cache_expiry_seconds" =>
193 self::DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS,
196 protected $bucket_name; // Name of the bucket for this object.
197 protected $object_name; // The name of the object.
198 protected $context_options = []; // Any context arguments supplied on open.
199 protected $url; // GCS URL of the object.
200 protected $anonymous; // Use anonymous access when contacting GCS.
203 * Construct an object of CloudStorageClient.
205 * @param string $bucket The name of the bucket.
206 * @param string $object The name of the object, or null if there is no
207 * object.
208 * @param resource $context The stream context to use.
210 public function __construct($bucket, $object = null, $context = null) {
211 $this->bucket_name = $bucket;
212 $this->object_name = $object;
213 if (!isset($context)) {
214 $context = stream_context_get_default();
216 $context_array = stream_context_get_options($context);
217 if (array_key_exists("gs", $context_array)) {
218 $this->context_options = array_merge(self::$default_gs_context_options,
219 $context_array["gs"]);
220 } else {
221 $this->context_options = self::$default_gs_context_options;
223 $this->anonymous = ArrayUtil::findByKeyOrNull($this->context_options,
224 "anonymous");
226 $this->url = $this->createObjectUrl($bucket, $object);
229 public function __destruct() {
232 public function initialize() {
233 return false;
236 public function dir_readdir() {
237 return false;
240 public function dir_rewinddir() {
241 return false;
244 // @return nothing
245 public function close() {
248 public function delete() {
249 return false;
252 public function eof() {
253 return true;
256 public function flush() {
257 return true;
260 public function read($count_bytes) {
261 return false;
264 public function seek($offset, $whence) {
265 return false;
268 public function stat() {
269 return false;
272 public function tell() {
273 return false;
276 public function write($data) {
277 return false;
281 * Subclass can override this method to return the metadata of the underlying
282 * GCS object.
284 public function getMetaData() {
285 trigger_error(sprintf("%s does not have metadata", get_class($this)));
286 return false;
290 * Subclass can override this method to return the MIME content type of the
291 * underlying GCS object.
293 public function getContentType() {
294 trigger_error(sprintf("%s does not have content type", get_class($this)));
295 return false;
299 * Get the OAuth Token HTTP header for the supplied scope.
301 * @param $scopes mixed The scopes to acquire the token for.
303 * @return array The HTTP authorization header for the scopes, using the
304 * applications service account. False if the call failed.
306 protected function getOAuthTokenHeader($scopes) {
307 if ($this->anonymous) {
308 return [];
311 try {
312 $token = AppIdentityService::getAccessToken($scopes);
313 return ["Authorization" => sprintf(self::OAUTH_TOKEN_FORMAT,
314 $token['access_token'])];
315 } catch (AppIdentityException $e) {
316 return false;
321 * Create a URL for a target bucket and optional object.
323 * @visibleForTesting
325 public static function createObjectUrl($bucket, $object = null) {
326 if (!isset($object)) {
327 $object = "";
330 // Strip leading "/" for $object.
331 if (StringUtil::startsWith($object, "/")) {
332 $object = substr($object, 1);
335 $gs_filename = CloudStorageTools::getFilename($bucket, $object);
336 return CloudStorageTools::getPublicUrl($gs_filename, true);
340 * Return a Range HTTP header.
342 * @param $start_byte int The offset of the first byte in the range.
343 * @param $end_byte int The offset of the last byte in the range.
345 * @return array The HTTP Range header for the supplied offsets.
347 protected function getRangeHeader($start_byte, $end_byte) {
348 assert($start_byte <= $end_byte);
349 return ["Range" => sprintf("bytes=%d-%d", $start_byte, $end_byte)];
353 * Make a request to GCS using HttpStreams.
355 * Returns:
356 * headers array
357 * response body
359 protected function makeHttpRequest($url, $method, $headers, $body = null) {
360 $request_headers = array_merge($headers, self::$api_version_header);
362 $result = $this->doHttpRequest($url,
363 $method,
364 $request_headers,
365 $body);
367 if ($result === false) {
368 return false;
371 return [
372 'status_code' => $result['status_code'],
373 'headers' => $result['headers'],
374 'body' => $result['body'],
379 * Return the value of a header stored in an associative array, using a case
380 * insensitive comparison on the header name.
382 * @param $header_name string The name of the header to lookup.
383 * @param $headers array Associative array of headers.
385 * @return The value of the header if found, false otherwise.
387 protected function getHeaderValue($header_name, $headers) {
388 foreach($headers as $key => $value) {
389 if (strcasecmp($key, $header_name) === 0) {
390 return $value;
393 return null;
399 private function doHttpRequest($url, $method, $headers, $body) {
400 $req = new \google\appengine\URLFetchRequest();
401 $req->setUrl($url);
402 $req->setMethod(self::$request_map[$method]);
403 $req->setMustValidateServerCertificate(true);
404 if (isset($body)) {
405 $req->setPayload($body);
408 foreach($headers as $key => $value) {
409 $h = $req->addHeader();
410 $h->setKey($key);
411 $h->setValue($value);
414 $resp = new \google\appengine\URLFetchResponse();
416 for ($num_retries = 0; ; $num_retries++) {
417 try {
418 ApiProxy::makeSyncCall('urlfetch', 'Fetch', $req, $resp);
419 } catch (ApplicationError $e) {
420 if (in_array($e->getApplicationError(), self::$retry_exception_codes)) {
421 // We need to set a plausible value in the URLFetchResponse proto in
422 // case the retry loop falls through - this will also cause a retry
423 // if one is available.
424 $resp->setStatusCode(HttpResponse::GATEWAY_TIMEOUT);
425 } else {
426 syslog(LOG_ERR,
427 sprintf("Call to URLFetch failed with application error %d " .
428 "for url %s.",
429 $e->getApplicationError(),
430 $url));
431 return false;
435 $status_code = $resp->getStatusCode();
437 if ($num_retries < $this->context_options['max_retries'] &&
438 in_array($status_code, self::$retryable_statuses) &&
439 (connection_status() & CONNECTION_TIMEOUT) == 0) {
440 usleep(rand(0, 1000000 * pow(2, $num_retries)));
441 if ((connection_status() & CONNECTION_TIMEOUT) == CONNECTION_TIMEOUT) {
442 break;
444 } else {
445 break;
449 $response_headers = [];
450 foreach($resp->getHeaderList() as $header) {
451 // TODO: Do we need to support multiple headers with the same key?
452 $response_headers[trim($header->getKey())] = trim($header->getValue());
455 return [
456 'status_code' => $resp->getStatusCode(),
457 'headers' => $response_headers,
458 'body' => $resp->getContent(),
463 * Generate the default stat() array, which is both associative and index
464 * based.
466 * @access private
468 protected function createStatArray($stat_args) {
469 $stat_keys = ["dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size",
470 "atime", "mtime", "ctime", "blksize", "blocks"];
472 $result = [];
474 foreach ($stat_keys as $key) {
475 $value = 0;
476 if (array_key_exists($key, $stat_args)) {
477 $value = $stat_args[$key];
479 // Add the associative entry.
480 $result[$key] = $value;
481 // Add the index entry.
482 $result[] = $value;
485 return $result;
489 * Extract metadata from HTTP response headers.
491 * Finds all headers that begin with METADATA_HEADER_PREFIX (x-goog-meta-),
492 * strips off the prefix, and creates an associative array.
494 * @param array $headers
495 * Associative array of HTTP headers.
496 * @return array
497 * Array of parsed metadata headers.
499 protected static function extractMetaData(array $headers) {
500 $metadata = [];
501 foreach($headers as $key => $value) {
502 if (StringUtil::startsWith(strtolower($key),
503 static::METADATA_HEADER_PREFIX)) {
504 $metadata_key = substr($key, strlen(static::METADATA_HEADER_PREFIX));
505 $metadata[$metadata_key] = $value;
509 return $metadata;
513 * Given an xml based error response from Cloud Storage, try and extract the
514 * error code and error message according to the schema described at
515 * https://developers.google.com/storage/docs/reference-status
517 * @param string $gcs_result The response body of the last call to Google
518 * Cloud Storage.
519 * @param string $code Reference variable where the error code for the last
520 * message will be returned.
521 * @param string $message Reference variable where the error detail for the
522 * last message will be returned.
523 * @return bool True if the error code and message could be extracted, false
524 * otherwise.
526 protected function tryParseCloudStorageErrorMessage($gcs_result,
527 &$code,
528 &$message) {
529 $code = null;
530 $message = null;
532 $old_errors = libxml_use_internal_errors(true);
533 $xml = simplexml_load_string($gcs_result);
535 if (false != $xml) {
536 $code = (string) $xml->Code;
537 $message = (string) $xml->Message;
539 libxml_use_internal_errors($old_errors);
540 return (isset($code) && isset($message));
544 * Return a formatted error message for the http response.
546 * @param int $http_status_code The HTTP status code returned from the last
547 * http request.
548 * @param string $http_result The response body from the last http request.
549 * @param string $msg_prefix The prefix to add to the error message that will
550 * be generated.
552 * @return string The error message for the last HTTP response.
554 protected function getErrorMessage($http_status_code,
555 $http_result,
556 $msg_prefix = "Cloud Storage Error:") {
557 if ($this->tryParseCloudStorageErrorMessage($http_result,
558 $code,
559 $message)) {
560 return sprintf("%s %s (%s)", $msg_prefix, $message, $code);
561 } else {
562 return sprintf("%s %s",
563 $msg_prefix,
564 HttpResponse::getStatusMessage($http_status_code));