3 * Copyright 2007 Google Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 * CloudStorageClient is the base class for classes that are used to communicate
19 * with Google Cloud Storage via the PHP streams interface.
23 namespace google\appengine\ext\cloud_storage_streams
;
25 use google\appengine\api\app_identity\AppIdentityService
;
26 use google\appengine\api\app_identity\AppIdentityException
;
27 use google\appengine\api\cloud_storage\CloudStorageTools
;
28 use google\appengine\runtime\ApiProxy
;
29 use google\appengine\runtime\ApplicationError
;
30 use google\appengine\URLFetchRequest\RequestMethod
;
31 use google\appengine\util\ArrayUtil
;
34 * CloudStorageClient provides default fail implementations for all of the
35 * methods that the stream wrapper might potentially call. Derived classes then
36 * only implement the methods that are relevant to the operations that they
39 abstract class CloudStorageClient
{
40 // The default chunk size that we will read from the file. This value should
41 // remain smaller than the maximum object size valid for memcache writes so
42 // we can cache the reads.
43 const DEFAULT_READ_SIZE
= 524288;
45 // The default amount of time that reads will be held in the cache.
46 const DEFAULT_READ_CACHE_EXPIRY_SECONDS
= 3600; // one hour
48 // The default maximum number of times that certain (see retryable_statuses)
49 // failed Google Cloud Storage requests will be retried before returning
51 const DEFAULT_MAXIMUM_NUMBER_OF_RETRIES
= 2;
53 // The default time the writable state of a bucket will be cached for.
54 const DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS
= 600; // ten minutes
56 // Token scopers for accessing objects in Google Cloud Storage
57 const READ_SCOPE
= "https://www.googleapis.com/auth/devstorage.read_only";
58 const WRITE_SCOPE
= "https://www.googleapis.com/auth/devstorage.read_write";
59 const FULL_SCOPE
= "https://www.googleapis.com/auth/devstorage.full_control";
61 // Format for the OAuth token header.
62 const OAUTH_TOKEN_FORMAT
= "OAuth %s";
64 // Content Range Header format when the total length is unknown.
65 const PARTIAL_CONTENT_RANGE_FORMAT
= "bytes %d-%d/*";
67 // Content Range Header format when the length is known.
68 const FINAL_CONTENT_RANGE_FORMAT
= "bytes %d-%d/%d";
70 // Content Range Header for final chunk with no new data
71 const FINAL_CONTENT_RANGE_NO_DATA
= "bytes */%d";
73 // A character or multiple characters that can be used to simplify a list of
74 // objects that use a directory-like naming scheme. Can be used in conjunction
76 const DELIMITER
= '/';
78 // Cloud storage can append _$folder$ to an object name and have it behave
79 // like a regular file system folder.
80 const FOLDER_SUFFIX
= '_$folder$';
82 // Temporary file name we create when checking if a bucket is writable.
83 const WRITABLE_TEMP_FILENAME
= "/_ah_is_writable_temp_file";
85 // Bit fields for the stat mode field
86 const S_IFREG
= 0100000;
87 const S_IFDIR
= 0040000;
89 const S_IRWXU
= 00700; // mask for owner permissions
90 const S_IRUSR
= 00400; // read for owner
91 const S_IWUSR
= 00200; // write for owner
92 const S_IXUSR
= 00100; // execute for owner
94 const S_IRWXG
= 00070; // mask for group permissions
95 const S_IRGRP
= 00040; // read for group
96 const S_IWGRP
= 00020; // write for group
97 const S_IXGRP
= 00010; // execute for group
99 const S_IRWXO
= 00007; // mask for other other permissions
100 const S_IROTH
= 00004; // read for other
101 const S_IWOTH
= 00002; // write for other
102 const S_IXOTH
= 00001; // execute for other
104 // The API version header
105 private static $api_version_header = ["x-goog-api-version" => 2];
107 // Regex patterm for retrieving the Length of the content being served.
108 const CONTENT_RANGE_REGEX
= "/bytes\s+(\d+)-(\d+)\/(\d+)/i";
111 * Memcache key format for caching the results of reads from GCS. The
112 * parameters are the object url (as a string) and the read range, as a
113 * string (e.g. bytes=0-512000).
114 * Example key for a cloud storage file gs://bucket/object.png
115 * _ah_gs_read_cache_https://storage.googleapis.com/bucket/object.png_bytes=0-524287
117 const MEMCACHE_KEY_FORMAT
= "_ah_gs_read_cache_%s_%s";
120 * Memcache key format for caching the results of checking if a bucket is
121 * writable. The only way to check if an app can write to a bucket is by
122 * actually writing a file. As the ACL on a bucket is unlikely to change
123 * then we can cache the result.
125 const WRITABLE_MEMCACHE_KEY_FORMAT
= "_ah_gs_write_bucket_cache_%s";
127 // HTTP status codes that should be retried if they are returned by a request
128 // to GCS. Retry should occur with a random exponential back-off.
129 protected static $retry_error_codes = [HttpResponse
::REQUEST_TIMEOUT
,
130 HttpResponse
::INTERNAL_SERVER_ERROR
,
131 HttpResponse
::BAD_GATEWAY
,
132 HttpResponse
::SERVICE_UNAVAILABLE
,
133 HttpResponse
::GATEWAY_TIMEOUT
];
135 // Values that are allowed to be supplied as ACLs when writing objects.
136 protected static $valid_acl_values = ["private",
139 "authenticated-read",
141 "bucket-owner-full-control"];
143 protected static $upload_start_header = ["x-goog-resumable" => "start"];
145 // Map HTTP request types to URLFetch method enum.
146 private static $request_map = [
147 "GET" => RequestMethod
::GET
,
148 "POST" => RequestMethod
::POST
,
149 "HEAD" => RequestMethod
::HEAD
,
150 "PUT" => RequestMethod
::PUT
,
151 "DELETE" => RequestMethod
::DELETE
,
152 "PATCH" => RequestMethod
::PATCH
155 private static $retryable_statuses = [
156 408, // Request Timeout
157 500, // Internal Server Error
159 503, // Service Unavailable
160 504, // Gateway Timeout
163 private static $default_gs_context_options = [
164 "enable_cache" => true,
165 "enable_optimistic_cache" => false,
166 "max_retries" => self
::DEFAULT_MAXIMUM_NUMBER_OF_RETRIES
,
167 "read_cache_expiry_seconds" => self
::DEFAULT_READ_CACHE_EXPIRY_SECONDS
,
168 "writable_cache_expiry_seconds" =>
169 self
::DEFAULT_WRITABLE_CACHE_EXPIRY_SECONDS
,
172 protected $bucket_name; // Name of the bucket for this object.
173 protected $object_name; // The name of the object.
174 protected $context_options = []; // Any context arguments supplied on open.
175 protected $url; // GCS URL of the object.
176 protected $anonymous; // Use anonymous access when contacting GCS.
179 * Construct an object of CloudStorageClient.
181 * @param string $bucket The name of the bucket.
182 * @param string $object The name of the object, or null if there is no
184 * @param resource $context The stream context to use.
186 public function __construct($bucket, $object = null, $context = null) {
187 $this->bucket_name
= $bucket;
188 $this->object_name
= $object;
189 if (!isset($context)) {
190 $context = stream_context_get_default();
192 $context_array = stream_context_get_options($context);
193 if (array_key_exists("gs", $context_array)) {
194 $this->context_options
= array_merge(self
::$default_gs_context_options,
195 $context_array["gs"]);
197 $this->context_options
= self
::$default_gs_context_options;
199 $this->anonymous
= ArrayUtil
::findByKeyOrNull($this->context_options
,
202 $this->url
= $this->createObjectUrl($bucket, $object);
205 public function __destruct() {
208 public function initialize() {
212 public function dir_readdir() {
216 public function dir_rewinddir() {
221 public function close() {
224 public function delete() {
228 public function eof() {
232 public function flush() {
236 public function read($count_bytes) {
240 public function seek($offset, $whence) {
244 public function stat() {
248 public function tell() {
252 public function write($data) {
257 * Get the OAuth Token HTTP header for the supplied scope.
259 * @param $scopes mixed The scopes to acquire the token for.
261 * @return array The HTTP authorization header for the scopes, using the
262 * applications service account. False if the call failed.
264 protected function getOAuthTokenHeader($scopes) {
265 if ($this->anonymous
) {
270 $token = AppIdentityService
::getAccessToken($scopes);
271 return ["Authorization" => sprintf(self
::OAUTH_TOKEN_FORMAT
,
272 $token['access_token'])];
273 } catch (AppIdentityException
$e) {
279 * Create a URL for a target bucket and optional object.
283 public static function createObjectUrl($bucket, $object = null) {
284 // Strip leading "/" for $object
285 if (isset($object) && $object[0] == "/") {
286 $object_name = substr($object, 1);
291 $gs_filename = CloudStorageTools
::getFilename($bucket, $object_name);
292 return CloudStorageTools
::getPublicUrl($gs_filename, true);
296 * Return a Range HTTP header.
298 * @param $start_byte int The offset of the first byte in the range.
299 * @param $end_byte int The offset of the last byte in the range.
301 * @return array The HTTP Range header for the supplied offsets.
303 protected function getRangeHeader($start_byte, $end_byte) {
304 assert($start_byte <= $end_byte);
305 return ["Range" => sprintf("bytes=%d-%d", $start_byte, $end_byte)];
309 * Make a request to GCS using HttpStreams.
315 protected function makeHttpRequest($url, $method, $headers, $body = null) {
316 $request_headers = array_merge($headers, self
::$api_version_header);
318 $result = $this->doHttpRequest($url,
323 if ($result === false) {
328 'status_code' => $result['status_code'],
329 'headers' => $result['headers'],
330 'body' => $result['body'],
335 * Return the value of a header stored in an associative array, using a case
336 * insensitive comparison on the header name.
338 * @param $header_name string The name of the header to lookup.
339 * @param $headers array Associative array of headers.
341 * @return The value of the header if found, false otherwise.
343 protected function getHeaderValue($header_name, $headers) {
344 // Could be more than one header, in which case we keep an array.
345 foreach($headers as $key => $value) {
346 if (strcasecmp($key, $header_name) === 0) {
356 private function doHttpRequest($url, $method, $headers, $body) {
357 $req = new \google\appengine\
URLFetchRequest();
359 $req->setMethod(self
::$request_map[$method]);
360 $req->setMustValidateServerCertificate(true);
362 $req->setPayload($body);
365 foreach($headers as $key => $value) {
366 $h = $req->addHeader();
368 $h->setValue($value);
371 $resp = new \google\appengine\
URLFetchResponse();
373 for ($num_retries = 0; ; $num_retries++
) {
375 ApiProxy
::makeSyncCall('urlfetch', 'Fetch', $req, $resp);
376 } catch (ApplicationError
$e) {
378 sprintf("Call to URLFetch failed with application error %d.",
379 $e->getApplicationError()));
382 $status_code = $resp->getStatusCode();
384 if ($num_retries < $this->context_options
['max_retries'] &&
385 in_array($status_code, self
::$retryable_statuses) &&
386 (connection_status() & CONNECTION_TIMEOUT
) == 0) {
387 usleep(rand(0, 1000000 * pow(2, $num_retries)));
388 if ((connection_status() & CONNECTION_TIMEOUT
) == CONNECTION_TIMEOUT
) {
396 $response_headers = [];
397 foreach($resp->getHeaderList() as $header) {
398 // TODO: Do we need to support multiple headers with the same key?
399 $response_headers[trim($header->getKey())] = trim($header->getValue());
403 'status_code' => $resp->getStatusCode(),
404 'headers' => $response_headers,
405 'body' => $resp->getContent(),
410 * Generate the default stat() array, which is both associative and index
415 protected function createStatArray($stat_args) {
416 $stat_keys = ["dev", "ino", "mode", "nlink", "uid", "gid", "rdev", "size",
417 "atime", "mtime", "ctime", "blksize", "blocks"];
421 foreach ($stat_keys as $key) {
423 if (array_key_exists($key, $stat_args)) {
424 $value = $stat_args[$key];
426 // Add the associative entry.
427 $result[$key] = $value;
428 // Add the index entry.
436 * Given an xml based error response from Cloud Storage, try and extract the
437 * error code and error message according to the schema described at
438 * https://developers.google.com/storage/docs/reference-status
440 * @param string $gcs_result The response body of the last call to Google
442 * @param string $code Reference variable where the error code for the last
443 * message will be returned.
444 * @param string $message Reference variable where the error detail for the
445 * last message will be returned.
446 * @return bool True if the error code and message could be extracted, false
449 protected function tryParseCloudStorageErrorMessage($gcs_result,
455 $old_errors = libxml_use_internal_errors(true);
456 $xml = simplexml_load_string($gcs_result);
459 $code = (string) $xml->Code
;
460 $message = (string) $xml->Message
;
462 libxml_use_internal_errors($old_errors);
463 return (isset($code) && isset($message));
467 * Return a formatted error message for the http response.
469 * @param int $http_status_code The HTTP status code returned from the last
471 * @param string $http_result The response body from the last http request.
472 * @param string $msg_prefix The prefix to add to the error message that will
475 * @return string The error message for the last HTTP response.
477 protected function getErrorMessage($http_status_code,
479 $msg_prefix = "Cloud Storage Error:") {
480 if ($this->tryParseCloudStorageErrorMessage($http_result,
483 return sprintf("%s %s (%s)", $msg_prefix, $message, $code);
485 return sprintf("%s %s",
487 HttpResponse
::getStatusMessage($http_status_code));