From 938a9ebe273bb2bc69e2507ae5fbb619c90c7968 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 18 Aug 2015 15:34:31 -0700 Subject: [PATCH] girocco: support fetching bundles Signed-off-by: Kyle J. McKay --- Girocco/Config.pm | 10 +++ Girocco/Util.pm | 1 + apache.conf.in | 41 +++++++++--- bin/git-http-backend-verify | 150 +++++++++++++++++++++++++++++++++++++++----- src/rangecgi.c | 131 +++++++++++++++++++++++++------------- 5 files changed, 264 insertions(+), 69 deletions(-) diff --git a/Girocco/Config.pm b/Girocco/Config.pm index 71cc04e..87aa668 100644 --- a/Girocco/Config.pm +++ b/Girocco/Config.pm @@ -425,6 +425,16 @@ our $htmlurl = "http://repo.or.cz/h"; # that send a User-Agent string containing "git/" (case insensitively). our $httppullurl = "http://repo.or.cz/r"; +# HTTP URL of the repository collection when fetching a bundle (undef if N/A) +# Normally this will be the same as $httppullurl, but note that the bundle +# fetching logic is located in git-http-backend-verify so whatever URL is +# given here MUST end up running the git-http-backend-verify script! +# For example, if we're fetching the 'clone.bundle' for the 'girocco.git' +# repository, the final URL will be "$httpbundleurl/girocco.git/clone.bundle" +# If mod_rewrite is enabled and the sample apache.conf configuration is used +# (with paths suitably updated), the trailing "/r" is optional for all clients. +our $httpbundleurl = "http://repo.or.cz/r"; + # HTTPS push URL of the repository collection (undef if N/A) # If this is defined, the openssl command must be available # The sample apache.conf configuration requires mod_rewrite be enabled to diff --git a/Girocco/Util.pm b/Girocco/Util.pm index cc43973..a816e1a 100644 --- a/Girocco/Util.pm +++ b/Girocco/Util.pm @@ -344,6 +344,7 @@ sub is_our_hostname { $Girocco::Config::webadmurl, $Girocco::Config::htmlurl, $Girocco::Config::httppullurl, + $Girocco::Config::httpbundleurl, $Girocco::Config::httpspushurl, $Girocco::Config::gitpullurl, $Girocco::Config::pushurl diff --git a/apache.conf.in b/apache.conf.in index d3b6511..88bdd24 100644 --- a/apache.conf.in +++ b/apache.conf.in @@ -49,15 +49,16 @@ RewriteCond %{HTTP_USER_AGENT} !git/ [NC] RewriteCond @@reporoot@@/$1.git/HEAD -f RewriteRule \ - ^/(?!w/)((?:[a-zA-Z0-9+._-]+(? @@ -187,7 +188,7 @@ RewriteEngine On RewriteCond %{HTTP_USER_AGENT} git/ [NC] - RewriteRule "(?x) ^/((?!r/)[^_].*/objects/(?: \ + RewriteRule "(?x) ^/((?![bchrw]/)[^_].*/objects/(?: \ (?:[0-9a-f]{2}/[0-9a-f]{38}) | \ (?:pack/pack-[0-9a-f]{40}.(?:pack|idx)) ))$" \ @@reporoot@@/$1 [L] @@ -198,30 +199,50 @@ # Disable non-smart HTTP access RewriteEngine On RewriteCond %{REQUEST_METHOD} !^POST$ + RewriteCond %{REQUEST_URI} !/[a-zA-Z0-9+._-]+\.bundle$ RewriteRule ^/r/.*(? # SetEnv GIT_HTTP_BACKEND_BIN to override Config.pm $git_http_backend_bin # git-http-backend-verify denies all access to projects starting with '_' ScriptAlias /r/ @@basedir@@/bin/git-http-backend-verify/ - # This allows HTTP access for Git user agents without the /r/ prefix RewriteEngine On - RewriteCond %{REQUEST_URI} !^/authrequired[.]cgi$ + + # This allows HTTP access for Git user agents + # without the leading /r/ prefix RewriteCond %{HTTP_USER_AGENT} git/ [NC] - RewriteRule ^/(?!r/)(.*)$ \ + RewriteCond %{REQUEST_URI} !^/authrequired[.]cgi$ + RewriteRule ^/(?![bchrw]/)(.*)$ \ + @@basedir@@/bin/git-http-backend-verify/$1 \ + [L,H=cgi-script] + + # ...and this for access by all agents to *.bundle + # files without the /r/ prefix for names ending in .git + RewriteRule \ + ^/(?![bchrw]/)((?:[a-zA-Z0-9+._-]+(? # ---- END LINES TO DUPLICATE ---- diff --git a/bin/git-http-backend-verify b/bin/git-http-backend-verify index 5b1ebf3..574a324 100755 --- a/bin/git-http-backend-verify +++ b/bin/git-http-backend-verify @@ -14,6 +14,8 @@ # # Also prevents standard error output from git-http-backend cluttering up the # server's log unless GIT_HTTP_BACKEND_SHOW_ERRORS is set to a non-empty value. +# +# Bundle fetches are handled in this script as well. set -e @@ -56,17 +58,23 @@ export GIT_HTTP_EXPORT_ALL # exist under $cfg_reporoot. Non-smart HTTP fetch requests (GET or HEAD) are # passed on unchanged and unchecked. -errorhdrs() +errorhdrsct() { + _ct="$1"; shift printf '%s\r\n' "Status: $1 $2" printf '%s\r\n' "Expires: Fri, 01 Jan 1980 00:00:00 GMT" printf '%s\r\n' "Pragma: no-cache" - printf '%s\r\n' "Cache-Control: no-cache, max-age=0, must-revalidate" + printf '%s\r\n' "Cache-Control: no-cache,max-age=0,must-revalidate" [ -z "$3" ] || printf '%s\r\n' "$3" - printf '%s\r\n' "Content-Type: text/plain" + printf '%s\r\n' "Content-Type: $_ct" printf '\r\n' } +errorhdrs() +{ + errorhdrsct 'text/plain' "$@" +} + msglines() { while [ $# -gt 0 ]; do @@ -113,6 +121,17 @@ forbidden() exit 0 } +notfound() +{ + errorhdrs 404 "Not Found" + if [ $# -eq 0 ]; then + msglines "Not Found" + else + msglines "$@" + fi + exit 0 +} + needsauth() { errorhdrs 401 "Authorization Required" @@ -124,6 +143,33 @@ needsauth() exit 0 } +# Single argument is an absolute PATH (NOT a URI) to 302 redirect to +# The appropriate http pull URL path prefix is automatically inserted +redir() +{ + _pullurl="$cfg_httpbundleurl" + [ -n "$_pullurl" ] || _pullurl="$cfg_httpspushurl" + [ -n "$_pullurl" ] || _pullurl="$cfg_httppullurl" + _absbase="${_pullurl%/}/" + _absbase="${absbase##*://}" + _absbase="${absbase##*/}" + [ -z "$_absbase" ] || _absbase="/$_absbase" + _loc="https" + [ "$HTTPS" = "on" ] || _loc="http" + _loc="$_loc://$SERVER_NAME" + [ "$HTTPS" != "on" -o "$SERVER_PORT" = "443" ] || _loc="$_loc:$SERVER_PORT" + [ "$HTTPS" = "on" -o "$SERVER_PORT" = "80" ] || _loc="$_loc:$SERVER_PORT" + _loc="$_loc$_absbase" + case "$1" in /*) :;; *) _loc="$_loc/";; esac + _loc="$_loc$1" + errorhdrsct 'text/html' 302 "Found" "Location: $_loc" + if [ "$REQUEST_METHOD" != "HEAD" ]; then + printf '

Temporarily redirected to %s

\n' \ + "$_loc" "$_loc" + fi + exit 0 +} + # A quick sanity check if [ -z "$cfg_git_http_backend_bin" ] || ! [ -x "$cfg_git_http_backend_bin" ]; then internalerr "bad cfg_git_http_backend_bin: $cfg_git_http_backend_bin" @@ -138,8 +184,12 @@ esac PATH="$(dirname "$cfg_git_http_backend_bin"):$PATH" export PATH +digit='[0-9]' +digit6="$digit$digit$digit$digit$digit$digit" +digit8="$digit6$digit$digit" proj= smart= +bundle= suffix= needsauthcheck= pathcheck="${PATH_INFO#/}" @@ -155,19 +205,29 @@ if [ "$REQUEST_METHOD" = "GET" -o "$REQUEST_METHOD" = "HEAD" ]; then # /objects/[0-9a-f]{2}/[0-9a-f]{38} # /objects/pack/pack-[0-9a-f]{40}.idx # /objects/pack/pack-[0-9a-f]{40}.pack - case "$pathcheck" in *"/info/refs") - proj="${pathcheck%/info/refs}" - case "&$QUERY_STRING&" in - *"&service=git-receive-pack&"*) - smart=1 - needsauthcheck=1 - suffix=info/refs + # We do, however, need to recognize a /*.bundle fetch so that + # we can properly handle it. + case "$pathcheck" in + *"/info/refs") + proj="${pathcheck%/info/refs}" + case "&$QUERY_STRING&" in + *"&service=git-receive-pack&"*) + smart=1 + needsauthcheck=1 + suffix=info/refs + ;; + *"&service=git-upload-pack&"*) + smart=1 + suffix=info/refs + ;; + esac ;; - *"&service=git-upload-pack&"*) + */*[!./].bundle) + bundle=1 smart=1 - suffix=info/refs + proj="${pathcheck%/*.bundle}" + suffix="${pathcheck#$proj/}" ;; - esac esac elif [ "$REQUEST_METHOD" = "POST" ]; then case "$pathcheck" in @@ -205,6 +265,7 @@ if [ -n "$smart" ]; then proj="$proj.git" esac + projbare="${proj%.git}" reporoot="$cfg_reporoot" dir="$reporoot/$proj" @@ -228,6 +289,67 @@ if [ -n "$smart" ]; then fi fi +if [ -n "$bundle" ]; then + # We support two kinds of bundles: + # 1) /path/to/foo.git/clone.bundle + # 2) /path/to/foo.git/foo-????????.bundle + # The first ALWAYS returns a 302 or 404 response + # The second ALWAYS returns a 404 or success + isredir= + projbase="${projbare##*/}" + case "$suffix" in + "clone.bundle") + isredir=1 + ;; + "$projbase-"$octet4".bundle") + :;; + *) + forbidden + exit 1 + esac + if [ -n "$isredir" ]; then + # A bundles/latest symlink must exist and + # point to an existing file in the same directory + # matching the magic format (\d{8}_\d{6}-$octet4) + if ! [ -L "$dir/bundles/latest" -a -f "$dir/bundles/latest" ]; then + notfound + exit 0 + fi + linked="$(readlink "$dir/bundles/latest")" || { notfound; exit 0; } + case "$linked" in ${digit8}_$digit6-$octet4) :;; *) + notfound + exit 0 + esac + linked="$projbase-${linked#????????_??????-}" + redir "/$proj/$linked.bundle" + exit 0 + fi + bundleid="${suffix%.bundle}" + bundleid="${bundleid##*-}" + bundlepat="${digit8}_$digit6-$bundleid" + bundlefile="$(echo "$dir/bundles/"$bundlepat 2>/dev/null || :)" + if [ "$dir/bundles/$bundlepat" = "$bundlefile" ] || ! [ -f "$bundlefile" ]; then + notfound + exit 0 + fi + { + read -r bundlehdr || : + read -r bundlepck || : + } <"$bundlefile" + [ -n "$bundlehdr" -a -n "$bundlepck" ] || { notfound; exit 0; } + # Non-absolute paths are relative to the repository's objects/pack dir + case "$bundlehdr" in /*) :;; *) + bundlehdr="$dir/objects/pack/$bundlehdr" + esac + case "$bundlepck" in /*) :;; *) + bundlepck="$dir/objects/pack/$bundlepck" + esac + exec "$cfg_basedir/bin/rangecgi" -c 'application/x-git-bundle' -e 180 \ + "$bundlehdr" "$bundlepck" + internalerr "exec failed: $cfg_basedir/bin/rangecgi" + exit 1 +fi + if [ -z "$needsauthcheck" ] || [ -z "$smart" ]; then if [ -n "$GIT_HTTP_BACKEND_SHOW_ERRORS" ]; then exec "$cfg_git_http_backend_bin" "$@" @@ -238,8 +360,6 @@ if [ -z "$needsauthcheck" ] || [ -z "$smart" ]; then exit 1 fi -projbare="${proj%.git}" - if ! [ -f "$dir/.nofetch" ]; then forbidden "The $proj project is a mirror and may not be pushed to, sorry" exit 1 diff --git a/src/rangecgi.c b/src/rangecgi.c index 54bca68..60c30a9 100644 --- a/src/rangecgi.c +++ b/src/rangecgi.c @@ -26,13 +26,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Only GET and HEAD requests are supported with either no "Range:" header or a "Range:" header with exactly one range. - USAGE: rangecgi [--etag] file1 file2 + USAGE: rangecgi ([--etag] | [-c ] [-e ]) file1 file2 If --etag is given then all environment variables are ignored and the computed ETag value (with the "", but without the "ETag:" prefix part) is output to standard output on success. Otherwise there is no output and the exit code will be non-zero. + If --etag is given then no other options are allowed. If -c + is given then the specified content type will be used as-is for the returned + item. If -e is given then a cache-control and expires header will + be output with the expiration set that many days into the future. + Other CGI parameters MUST be passed as environment variables in particular REQUEST_METHOD MUST be set and to request a range, HTTP_RANGE MUST be set. HTTP_IF_RANGE MAY be set. No other environment variables are examined. @@ -95,7 +100,7 @@ static void errorexit_(unsigned code, const char *status, const char *extrahdr) printf("Status: %u %s\r\n", code, status); printf("%s\r\n", "Expires: Fri, 01 Jan 1980 00:00:00 GMT"); printf("%s\r\n", "Pragma: no-cache"); - printf("%s\r\n", "Cache-Control: no-cache, max-age=0, must-revalidate"); + printf("%s\r\n", "Cache-Control: no-cache,max-age=0,must-revalidate"); printf("%s\r\n", "Accept-Ranges: bytes"); if (extrahdr) printf("%s\r\n", extrahdr); @@ -106,13 +111,12 @@ static void errorexit_(unsigned code, const char *status, const char *extrahdr) exit(0); } -static void emithdrs(time_t lm, const char *etag, bignum tl, int isr, bignum r1, bignum r2) +static void emithdrs(const char *ct, int exp, time_t lm, const char *etag, + bignum tl, int isr, bignum r1, bignum r2) { struct tm gt; - char lmstr[32]; + char dtstr[32]; - gt = *gmtime(&lm); - strftime(lmstr, sizeof(lmstr), "%a, %d %b %Y %H:%M:%S GMT", >); if (isr) if (isr > 0) printf("Status: %u %s\r\n", 206, "Partial Content"); @@ -120,8 +124,26 @@ static void emithdrs(time_t lm, const char *etag, bignum tl, int isr, bignum r1, printf("Status: %u %s\r\n", 416, "Requested Range Not Satisfiable"); else printf("Status: %u %s\r\n", 200, "OK"); + if (exp > 0) { + time_t epsecs = time(NULL); + long esecs = 86400 * exp; + gt = *gmtime(&epsecs); + strftime(dtstr, sizeof(dtstr), "%a, %d %b %Y %H:%M:%S GMT", >); + printf("Date: %s\r\n", dtstr); + epsecs += esecs; + gt = *gmtime(&epsecs); + strftime(dtstr, sizeof(dtstr), "%a, %d %b %Y %H:%M:%S GMT", >); + printf("Expires: %s\r\n", dtstr); + printf("Cache-Control: public,max-age=%ld\r\n", esecs); + } else if (!exp) { + printf("%s\r\n", "Expires: Fri, 01 Jan 1980 00:00:00 GMT"); + printf("%s\r\n", "Pragma: no-cache"); + printf("%s\r\n", "Cache-Control: no-cache,max-age=0,must-revalidate"); + } printf("%s\r\n", "Accept-Ranges: bytes"); - printf("Last-Modified: %s\r\n", lmstr); + gt = *gmtime(&lm); + strftime(dtstr, sizeof(dtstr), "%a, %d %b %Y %H:%M:%S GMT", >); + printf("Last-Modified: %s\r\n", dtstr); if (etag) printf("ETag: %s\r\n", etag); if (!isr) { @@ -132,17 +154,21 @@ static void emithdrs(time_t lm, const char *etag, bignum tl, int isr, bignum r1, } else { printf("Content-Range: bytes */%llu\r\n", tl); } - if (isr >= 0) - printf("%s\r\n", "Content-Type: application/octet-stream"); - else + if (isr >= 0) { + if (!ct || !*ct) + ct = "application/octet-stream"; + printf("Content-Type: %s\r\n", ct); + printf("%s\r\n", "Vary: Accept-Encoding"); + } else { printf("%s\r\n%s\n", "Content-Type: text/plain", "Requested Range Not Satisfiable"); + } printf("%s\r\n", ""); } static void error416(time_t lm, const char *etag, bignum tl) { - emithdrs(lm, etag, tl, -1, 0, 0); + emithdrs(NULL, -1, lm, etag, tl, -1, 0, 0); fflush(stdout); exit(0); } @@ -226,34 +252,55 @@ void dumpfile(int fd, bignum start, bignum len) int main(int argc, char *argv[]) { + int isetag = argc == 4 && !strcmp(argv[1], "--etag"); void (*errorexit)(unsigned,const char *,const char *) = - argc == 3 ? errorexit_ : errorfail_; + isetag ? errorfail_ : errorexit_; statrec f1, f2; int e1, e2, i=1; bignum l1, l2, tl; bignum r1=0, r2=0; bignum start, length; time_t lm; - const char *rm = argc == 3 ? getenv("REQUEST_METHOD") : NULL; - const char *hr = argc == 3 ? getenv("HTTP_RANGE") : NULL; + const char *rm = !isetag ? getenv("REQUEST_METHOD") : NULL; + const char *hr = !isetag ? getenv("HTTP_RANGE") : NULL; const char *hir = hr ? getenv("HTTP_IF_RANGE") : NULL; + const char *ct = NULL; + int expdays = -1; /* "inode_inode-size-time_t_micros" each in hex up to 8 bytes gives */ /* "16bytes_16bytes-16bytes-16bytes" plus NUL = 70 bytes (including "") */ char etag[70]; int fd1 = -1, fd2 = -1; - if (argc == 3 && (!rm || !*rm)) - exit(1); - - if (argc < 3 || argc > 4) - exit(2); - if (argc == 4) { - if (strcmp(argv[1], "--etag")) - exit(2); + if (isetag) { i = 2; + } else { + int ch; + opterr = 0; + while ((ch = getopt(argc, argv, "c:e:")) != -1) { + switch(ch) { + case 'c': + ct = optarg; + break; + case 'e': + { + int v, n; + if (sscanf(optarg, "%i%n", &v, &n) != 1 || n != (int)strlen(optarg)) + exit(2); + expdays = v; + break; + } + default: + exit(2); + } + } + if (argc - optind != 2) + exit(2); + if (!rm || !*rm) + exit(1); + i = optind; } - if (argc == 3 && strcmp(rm, "GET") && strcmp(rm, "HEAD")) + if (!isetag && strcmp(rm, "GET") && strcmp(rm, "HEAD")) errorexit(405, "Method Not Allowed", "Allow: GET,HEAD"); fd1 = open(argv[i], O_RDONLY); @@ -280,7 +327,7 @@ int main(int argc, char *argv[]) sprintf(etag, "\"%llx_%llx-%llx-%llx\"", (unsigned long long)f1.st_ino, (unsigned long long)f2.st_ino, tl, (unsigned long long)lm * 1000000U); - if (argc == 4) { + if (isetag) { close(fd2); close(fd1); printf("%s\n", etag); @@ -341,28 +388,24 @@ int main(int argc, char *argv[]) length = tl; } - if (!strcmp(rm, "HEAD")) { - emithdrs(lm, etag, tl, hr?1:0, r1, r2); - fflush(stdout); - exit(0); - } - - emithdrs(lm, etag, tl, hr?1:0, r1, r2); + emithdrs(ct, expdays, lm, etag, tl, hr?1:0, r1, r2); fflush(stdout); - if (start < l1) { - bignum dl = l1 - start; - if (dl > length) dl = length; - dumpfile(fd1, start, dl); - start += dl; - length -= dl; - } - if (length && start >= l1) { - bignum dl; - start -= l1; - dl = l2 - start; - if (dl > length) dl = length; - dumpfile(fd2, start, dl); + if (strcmp(rm, "HEAD")) { + if (start < l1) { + bignum dl = l1 - start; + if (dl > length) dl = length; + dumpfile(fd1, start, dl); + start += dl; + length -= dl; + } + if (length && start >= l1) { + bignum dl; + start -= l1; + dl = l2 - start; + if (dl > length) dl = length; + dumpfile(fd2, start, dl); + } } close(fd2); -- 2.11.4.GIT