3 * Copyright 2007 Google Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 * CloudStorageClient is the base class for classes that are used to communicate
19 * with Google Cloud Storage via the PHP streams interface.
23 namespace google\appengine\ext\cloud_storage_streams
;
25 use google\appengine\api\app_identity\AppIdentityService
;
26 use google\appengine\api\app_identity\AppIdentityException
;
27 use google\appengine\api\cloud_storage\CloudStorageTools
;
28 use google\appengine\runtime\ApiProxy
;
29 use google\appengine\runtime\ApplicationError
;
30 use google\appengine\URLFetchRequest\RequestMethod
;
31 use google\appengine\URLFetchServiceError\ErrorCode
;
32 use google\appengine\util\ArrayUtil
;
33 use google\appengine\util\StringUtil
;
36 * CloudStorageClient provides default fail implementations for all of the
37 * methods that the stream wrapper might potentially call. Derived classes then
38 * only implement the methods that are relevant to the operations that they
41 abstract class CloudStorageClient
{
43 * Headers that may be controlled by the user through the stream context.
45 protected static $METADATA_HEADERS = [
47 'Content-Disposition',
51 // x-goog-meta-* handled separately.
55 * Prefix for all metadata headers used when parsing and rendering.
57 const METADATA_HEADER_PREFIX
= 'x-goog-meta-';
59 // The default chunk size that we will read from the file. This value should
60 // remain smaller than the maximum object size valid for memcache writes so
61 // we can cache the reads.
62 const DEFAULT_READ_SIZE
= 524288;
64 // The default amount of time that reads will be held in the cache.
65 const DEFAULT_READ_CACHE_EXPIRY_SECONDS
= 3600; // one hour
67 // The default maximum number of times that certain (see retryable_statuses)
68 // failed Google Cloud Storage requests will be retried before returning
70 const DEFAULT_MAXIMUM_NUMBER_OF_RETRIES
= 2;
72 // The default time the writable state of a bucket will be cached for.
73 const DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS
= 600; // ten minutes
75 // Token scopers for accessing objects in Google Cloud Storage
76 const READ_SCOPE
= "https://www.googleapis.com/auth/devstorage.read_only";
77 const WRITE_SCOPE
= "https://www.googleapis.com/auth/devstorage.read_write";
78 const FULL_SCOPE
= "https://www.googleapis.com/auth/devstorage.full_control";
80 // Format for the OAuth token header.
81 const OAUTH_TOKEN_FORMAT
= "OAuth %s";
83 // Content Range Header format when the total length is unknown.
84 const PARTIAL_CONTENT_RANGE_FORMAT
= "bytes %d-%d/*";
86 // Content Range Header format when the length is known.
87 const FINAL_CONTENT_RANGE_FORMAT
= "bytes %d-%d/%d";
89 // Content Range Header for final chunk with no new data
90 const FINAL_CONTENT_RANGE_NO_DATA
= "bytes */%d";
92 // A character or multiple characters that can be used to simplify a list of
93 // objects that use a directory-like naming scheme. Can be used in conjunction
95 const DELIMITER
= '/';
97 // Cloud storage can append _$folder$ to an object name and have it behave
98 // like a regular file system folder.
99 const FOLDER_SUFFIX
= '_$folder$';
101 // Temporary file name we create when checking if a bucket is writable.
102 const WRITABLE_TEMP_FILENAME
= "/_ah_is_writable_temp_file";
104 // Bit fields for the stat mode field
105 const S_IFREG
= 0100000;
106 const S_IFDIR
= 0040000;
108 const S_IRWXU
= 00700; // mask for owner permissions
109 const S_IRUSR
= 00400; // read for owner
110 const S_IWUSR
= 00200; // write for owner
111 const S_IXUSR
= 00100; // execute for owner
113 const S_IRWXG
= 00070; // mask for group permissions
114 const S_IRGRP
= 00040; // read for group
115 const S_IWGRP
= 00020; // write for group
116 const S_IXGRP
= 00010; // execute for group
118 const S_IRWXO
= 00007; // mask for other other permissions
119 const S_IROTH
= 00004; // read for other
120 const S_IWOTH
= 00002; // write for other
121 const S_IXOTH
= 00001; // execute for other
123 // The API version header
124 private static $api_version_header = ["x-goog-api-version" => 2];
126 // Regex patterm for retrieving the Length of the content being served.
127 const CONTENT_RANGE_REGEX
= "/bytes\s+(\d+)-(\d+)\/(\d+)/i";
130 * Memcache key format for caching the results of reads from GCS. The
131 * parameters are the object url (as a string) and the read range, as a
132 * string (e.g. bytes=0-512000).
133 * Example key for a cloud storage file gs://bucket/object.png
134 * _ah_gs_read_cache_https://storage.googleapis.com/bucket/object.png_bytes=0-524287
136 const MEMCACHE_KEY_FORMAT
= "_ah_gs_read_cache_%s_%s";
139 * Memcache key format for caching the results of checking if a bucket is
140 * writable. The only way to check if an app can write to a bucket is by
141 * actually writing a file. As the ACL on a bucket is unlikely to change
142 * then we can cache the result.
144 const WRITABLE_MEMCACHE_KEY_FORMAT
= "_ah_gs_write_bucket_cache_%s";
146 // HTTP status codes that should be retried if they are returned by a request
147 // to GCS. Retry should occur with a random exponential back-off.
148 protected static $retry_error_codes = [HttpResponse
::REQUEST_TIMEOUT
,
149 HttpResponse
::INTERNAL_SERVER_ERROR
,
150 HttpResponse
::BAD_GATEWAY
,
151 HttpResponse
::SERVICE_UNAVAILABLE
,
152 HttpResponse
::GATEWAY_TIMEOUT
];
154 protected static $retry_exception_codes = [
155 ErrorCode
::DEADLINE_EXCEEDED
,
156 ErrorCode
::FETCH_ERROR
,
157 ErrorCode
::INTERNAL_TRANSIENT_ERROR
];
159 // Values that are allowed to be supplied as ACLs when writing objects.
160 protected static $valid_acl_values = ["private",
163 "authenticated-read",
165 "bucket-owner-full-control"];
167 protected static $upload_start_header = ["x-goog-resumable" => "start"];
169 // Map HTTP request types to URLFetch method enum.
170 private static $request_map = [
171 "GET" => RequestMethod
::GET
,
172 "POST" => RequestMethod
::POST
,
173 "HEAD" => RequestMethod
::HEAD
,
174 "PUT" => RequestMethod
::PUT
,
175 "DELETE" => RequestMethod
::DELETE
,
176 "PATCH" => RequestMethod
::PATCH
179 private static $retryable_statuses = [
180 408, // Request Timeout
181 500, // Internal Server Error
183 503, // Service Unavailable
184 504, // Gateway Timeout
187 private static $default_gs_context_options = [
188 "enable_cache" => true,
189 "enable_optimistic_cache" => false,
190 "max_retries" => self
::DEFAULT_MAXIMUM_NUMBER_OF_RETRIES
,
191 "read_cache_expiry_seconds" => self
::DEFAULT_READ_CACHE_EXPIRY_SECONDS
,
192 "writable_cache_expiry_seconds" =>
193 self
::DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS
,
196 protected $bucket_name; // Name of the bucket for this object.
197 protected $object_name; // The name of the object.
198 protected $context_options = []; // Any context arguments supplied on open.
199 protected $url; // GCS URL of the object.
200 protected $anonymous; // Use anonymous access when contacting GCS.
203 * Construct an object of CloudStorageClient.
205 * @param string $bucket The name of the bucket.
206 * @param string $object The name of the object, or null if there is no
208 * @param resource $context The stream context to use.
210 public function __construct($bucket, $object = null, $context = null) {
211 $this->bucket_name
= $bucket;
212 $this->object_name
= $object;
213 if (!isset($context)) {
214 $context = stream_context_get_default();
216 $context_array = stream_context_get_options($context);
217 if (array_key_exists("gs", $context_array)) {
218 $this->context_options
= array_merge(self
::$default_gs_context_options,
219 $context_array["gs"]);
221 $this->context_options
= self
::$default_gs_context_options;
223 $this->anonymous
= ArrayUtil
::findByKeyOrNull($this->context_options
,
226 $this->url
= $this->createObjectUrl($bucket, $object);
229 public function __destruct() {
232 public function initialize() {
236 public function dir_readdir() {
240 public function dir_rewinddir() {
245 public function close() {
248 public function delete() {
252 public function eof() {
256 public function flush() {
260 public function read($count_bytes) {
264 public function seek($offset, $whence) {
268 public function stat() {
272 public function tell() {
276 public function write($data) {
281 * Subclass can override this method to return the metadata of the underlying
284 public function getMetaData() {
285 trigger_error(sprintf("%s does not have metadata", get_class($this)));
290 * Subclass can override this method to return the MIME content type of the
291 * underlying GCS object.
293 public function getContentType() {
294 trigger_error(sprintf("%s does not have content type", get_class($this)));
299 * Get the OAuth Token HTTP header for the supplied scope.
301 * @param $scopes mixed The scopes to acquire the token for.
303 * @return array The HTTP authorization header for the scopes, using the
304 * applications service account. False if the call failed.
306 protected function getOAuthTokenHeader($scopes) {
307 if ($this->anonymous
) {
312 $token = AppIdentityService
::getAccessToken($scopes);
313 return ["Authorization" => sprintf(self
::OAUTH_TOKEN_FORMAT
,
314 $token['access_token'])];
315 } catch (AppIdentityException
$e) {
321 * Create a URL for a target bucket and optional object.
325 public static function createObjectUrl($bucket, $object = null) {
326 if (!isset($object)) {
330 // Strip leading "/" for $object.
331 if (StringUtil
::startsWith($object, "/")) {
332 $object = substr($object, 1);
335 $gs_filename = CloudStorageTools
::getFilename($bucket, $object);
336 return CloudStorageTools
::getPublicUrl($gs_filename, true);
340 * Return a Range HTTP header.
342 * @param $start_byte int The offset of the first byte in the range.
343 * @param $end_byte int The offset of the last byte in the range.
345 * @return array The HTTP Range header for the supplied offsets.
347 protected function getRangeHeader($start_byte, $end_byte) {
348 assert($start_byte <= $end_byte);
349 return ["Range" => sprintf("bytes=%d-%d", $start_byte, $end_byte)];
353 * Make a request to GCS using HttpStreams.
359 protected function makeHttpRequest($url, $method, $headers, $body = null) {
360 $request_headers = array_merge($headers, self
::$api_version_header);
362 $result = $this->doHttpRequest($url,
367 if ($result === false) {
372 'status_code' => $result['status_code'],
373 'headers' => $result['headers'],
374 'body' => $result['body'],
379 * Return the value of a header stored in an associative array, using a case
380 * insensitive comparison on the header name.
382 * @param $header_name string The name of the header to lookup.
383 * @param $headers array Associative array of headers.
385 * @return The value of the header if found, false otherwise.
387 protected function getHeaderValue($header_name, $headers) {
388 foreach($headers as $key => $value) {
389 if (strcasecmp($key, $header_name) === 0) {
399 private function doHttpRequest($url, $method, $headers, $body) {
400 $req = new \google\appengine\
URLFetchRequest();
402 $req->setMethod(self
::$request_map[$method]);
403 $req->setMustValidateServerCertificate(true);
405 $req->setPayload($body);
408 foreach($headers as $key => $value) {
409 $h = $req->addHeader();
411 $h->setValue($value);
414 $resp = new \google\appengine\
URLFetchResponse();
416 for ($num_retries = 0; ; $num_retries++
) {
418 ApiProxy
::makeSyncCall('urlfetch', 'Fetch', $req, $resp);
419 } catch (ApplicationError
$e) {
420 if (in_array($e->getApplicationError(), self
::$retry_exception_codes)) {
421 // We need to set a plausible value in the URLFetchResponse proto in
422 // case the retry loop falls through - this will also cause a retry
423 // if one is available.
424 $resp->setStatusCode(HttpResponse
::GATEWAY_TIMEOUT
);
427 sprintf("Call to URLFetch failed with application error %d " .
429 $e->getApplicationError(),
435 $status_code = $resp->getStatusCode();
437 if ($num_retries < $this->context_options
['max_retries'] &&
438 in_array($status_code, self
::$retryable_statuses) &&
439 (connection_status() & CONNECTION_TIMEOUT
) == 0) {
440 usleep(rand(0, 1000000 * pow(2, $num_retries)));
441 if ((connection_status() & CONNECTION_TIMEOUT
) == CONNECTION_TIMEOUT
) {
449 $response_headers = [];
450 foreach($resp->getHeaderList() as $header) {
451 // TODO: Do we need to support multiple headers with the same key?
452 $response_headers[trim($header->getKey())] = trim($header->getValue());
456 'status_code' => $resp->getStatusCode(),
457 'headers' => $response_headers,
458 'body' => $resp->getContent(),
463 * Generate the default stat() array, which is both associative and index
468 protected function createStatArray($stat_args) {
469 $stat_keys = ["dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size",
470 "atime", "mtime", "ctime", "blksize", "blocks"];
474 foreach ($stat_keys as $key) {
476 if (array_key_exists($key, $stat_args)) {
477 $value = $stat_args[$key];
479 // Add the associative entry.
480 $result[$key] = $value;
481 // Add the index entry.
489 * Extract metadata from HTTP response headers.
491 * Finds all headers that begin with METADATA_HEADER_PREFIX (x-goog-meta-),
492 * strips off the prefix, and creates an associative array.
494 * @param array $headers
495 * Associative array of HTTP headers.
497 * Array of parsed metadata headers.
499 protected static function extractMetaData(array $headers) {
501 foreach($headers as $key => $value) {
502 if (StringUtil
::startsWith(strtolower($key),
503 static::METADATA_HEADER_PREFIX
)) {
504 $metadata_key = substr($key, strlen(static::METADATA_HEADER_PREFIX
));
505 $metadata[$metadata_key] = $value;
513 * Given an xml based error response from Cloud Storage, try and extract the
514 * error code and error message according to the schema described at
515 * https://developers.google.com/storage/docs/reference-status
517 * @param string $gcs_result The response body of the last call to Google
519 * @param string $code Reference variable where the error code for the last
520 * message will be returned.
521 * @param string $message Reference variable where the error detail for the
522 * last message will be returned.
523 * @return bool True if the error code and message could be extracted, false
526 protected function tryParseCloudStorageErrorMessage($gcs_result,
532 $old_errors = libxml_use_internal_errors(true);
533 $xml = simplexml_load_string($gcs_result);
536 $code = (string) $xml->Code
;
537 $message = (string) $xml->Message
;
539 libxml_use_internal_errors($old_errors);
540 return (isset($code) && isset($message));
544 * Return a formatted error message for the http response.
546 * @param int $http_status_code The HTTP status code returned from the last
548 * @param string $http_result The response body from the last http request.
549 * @param string $msg_prefix The prefix to add to the error message that will
552 * @return string The error message for the last HTTP response.
554 protected function getErrorMessage($http_status_code,
556 $msg_prefix = "Cloud Storage Error:") {
557 if ($this->tryParseCloudStorageErrorMessage($http_result,
560 return sprintf("%s %s (%s)", $msg_prefix, $message, $code);
562 return sprintf("%s %s",
564 HttpResponse
::getStatusMessage($http_status_code));