Merge branch 'jc/pack'
[git.git] / http-fetch.c
bloba113bb8c4b0c8d59b5b6df3c7689ba9f50818753
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int commits_on_stdin;
41 static int got_alternates = -1;
42 static int corrupt_object_found;
44 static struct curl_slist *no_pragma_header;
46 struct alt_base
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
55 static struct alt_base *alt;
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
64 struct object_request
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
93 #ifndef NO_EXPAT
94 struct xml_ctx
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
103 struct remote_ls_ctx
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
115 #endif
117 static struct object_request *object_queue_head;
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
147 static void fetch_alternates(const char *base);
149 static void process_object_response(void *callback_data);
151 static void start_object_request(struct object_request *obj_req)
153 char *hex = sha1_to_hex(obj_req->sha1);
154 char prevfile[PATH_MAX];
155 char *url;
156 char *posn;
157 int prevlocal;
158 unsigned char prev_buf[PREV_BUF_SIZE];
159 ssize_t prev_read = 0;
160 long prev_posn = 0;
161 char range[RANGE_HEADER_SIZE];
162 struct curl_slist *range_header = NULL;
163 struct active_request_slot *slot;
165 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
166 unlink(prevfile);
167 rename(obj_req->tmpfile, prevfile);
168 unlink(obj_req->tmpfile);
170 if (obj_req->local != -1)
171 error("fd leakage in start: %d", obj_req->local);
172 obj_req->local = open(obj_req->tmpfile,
173 O_WRONLY | O_CREAT | O_EXCL, 0666);
174 /* This could have failed due to the "lazy directory creation";
175 * try to mkdir the last path component.
177 if (obj_req->local < 0 && errno == ENOENT) {
178 char *dir = strrchr(obj_req->tmpfile, '/');
179 if (dir) {
180 *dir = 0;
181 mkdir(obj_req->tmpfile, 0777);
182 *dir = '/';
184 obj_req->local = open(obj_req->tmpfile,
185 O_WRONLY | O_CREAT | O_EXCL, 0666);
188 if (obj_req->local < 0) {
189 obj_req->state = ABORTED;
190 error("Couldn't create temporary file %s for %s: %s",
191 obj_req->tmpfile, obj_req->filename, strerror(errno));
192 return;
195 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
197 inflateInit(&obj_req->stream);
199 SHA1_Init(&obj_req->c);
201 url = xmalloc(strlen(obj_req->repo->base) + 50);
202 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
203 strcpy(url, obj_req->repo->base);
204 posn = url + strlen(obj_req->repo->base);
205 strcpy(posn, "objects/");
206 posn += 8;
207 memcpy(posn, hex, 2);
208 posn += 2;
209 *(posn++) = '/';
210 strcpy(posn, hex + 2);
211 strcpy(obj_req->url, url);
213 /* If a previous temp file is present, process what was already
214 fetched. */
215 prevlocal = open(prevfile, O_RDONLY);
216 if (prevlocal != -1) {
217 do {
218 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
219 if (prev_read>0) {
220 if (fwrite_sha1_file(prev_buf,
222 prev_read,
223 obj_req) == prev_read) {
224 prev_posn += prev_read;
225 } else {
226 prev_read = -1;
229 } while (prev_read > 0);
230 close(prevlocal);
232 unlink(prevfile);
234 /* Reset inflate/SHA1 if there was an error reading the previous temp
235 file; also rewind to the beginning of the local file. */
236 if (prev_read == -1) {
237 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
238 inflateInit(&obj_req->stream);
239 SHA1_Init(&obj_req->c);
240 if (prev_posn>0) {
241 prev_posn = 0;
242 lseek(obj_req->local, SEEK_SET, 0);
243 ftruncate(obj_req->local, 0);
247 slot = get_active_slot();
248 slot->callback_func = process_object_response;
249 slot->callback_data = obj_req;
250 obj_req->slot = slot;
252 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
253 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
254 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
255 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
256 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
258 /* If we have successfully processed data from a previous fetch
259 attempt, only fetch the data we don't already have. */
260 if (prev_posn>0) {
261 if (get_verbosely)
262 fprintf(stderr,
263 "Resuming fetch of object %s at byte %ld\n",
264 hex, prev_posn);
265 sprintf(range, "Range: bytes=%ld-", prev_posn);
266 range_header = curl_slist_append(range_header, range);
267 curl_easy_setopt(slot->curl,
268 CURLOPT_HTTPHEADER, range_header);
271 /* Try to get the request started, abort the request on error */
272 obj_req->state = ACTIVE;
273 if (!start_active_slot(slot)) {
274 obj_req->state = ABORTED;
275 obj_req->slot = NULL;
276 close(obj_req->local); obj_req->local = -1;
277 free(obj_req->url);
278 return;
282 static void finish_object_request(struct object_request *obj_req)
284 struct stat st;
286 fchmod(obj_req->local, 0444);
287 close(obj_req->local); obj_req->local = -1;
289 if (obj_req->http_code == 416) {
290 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
291 } else if (obj_req->curl_result != CURLE_OK) {
292 if (stat(obj_req->tmpfile, &st) == 0)
293 if (st.st_size == 0)
294 unlink(obj_req->tmpfile);
295 return;
298 inflateEnd(&obj_req->stream);
299 SHA1_Final(obj_req->real_sha1, &obj_req->c);
300 if (obj_req->zret != Z_STREAM_END) {
301 unlink(obj_req->tmpfile);
302 return;
304 if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
305 unlink(obj_req->tmpfile);
306 return;
308 obj_req->rename =
309 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
311 if (obj_req->rename == 0)
312 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
315 static void process_object_response(void *callback_data)
317 struct object_request *obj_req =
318 (struct object_request *)callback_data;
320 obj_req->curl_result = obj_req->slot->curl_result;
321 obj_req->http_code = obj_req->slot->http_code;
322 obj_req->slot = NULL;
323 obj_req->state = COMPLETE;
325 /* Use alternates if necessary */
326 if (obj_req->http_code == 404 ||
327 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
328 fetch_alternates(alt->base);
329 if (obj_req->repo->next != NULL) {
330 obj_req->repo =
331 obj_req->repo->next;
332 close(obj_req->local);
333 obj_req->local = -1;
334 start_object_request(obj_req);
335 return;
339 finish_object_request(obj_req);
342 static void release_object_request(struct object_request *obj_req)
344 struct object_request *entry = object_queue_head;
346 if (obj_req->local != -1)
347 error("fd leakage in release: %d", obj_req->local);
348 if (obj_req == object_queue_head) {
349 object_queue_head = obj_req->next;
350 } else {
351 while (entry->next != NULL && entry->next != obj_req)
352 entry = entry->next;
353 if (entry->next == obj_req)
354 entry->next = entry->next->next;
357 free(obj_req->url);
358 free(obj_req);
361 #ifdef USE_CURL_MULTI
362 void fill_active_slots(void)
364 struct object_request *obj_req = object_queue_head;
365 struct active_request_slot *slot = active_queue_head;
366 int num_transfers;
368 while (active_requests < max_requests && obj_req != NULL) {
369 if (obj_req->state == WAITING) {
370 if (has_sha1_file(obj_req->sha1))
371 obj_req->state = COMPLETE;
372 else
373 start_object_request(obj_req);
374 curl_multi_perform(curlm, &num_transfers);
376 obj_req = obj_req->next;
379 while (slot != NULL) {
380 if (!slot->in_use && slot->curl != NULL) {
381 curl_easy_cleanup(slot->curl);
382 slot->curl = NULL;
384 slot = slot->next;
387 #endif
389 void prefetch(unsigned char *sha1)
391 struct object_request *newreq;
392 struct object_request *tail;
393 char *filename = sha1_file_name(sha1);
395 newreq = xmalloc(sizeof(*newreq));
396 hashcpy(newreq->sha1, sha1);
397 newreq->repo = alt;
398 newreq->url = NULL;
399 newreq->local = -1;
400 newreq->state = WAITING;
401 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
402 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
403 "%s.temp", filename);
404 newreq->slot = NULL;
405 newreq->next = NULL;
407 if (object_queue_head == NULL) {
408 object_queue_head = newreq;
409 } else {
410 tail = object_queue_head;
411 while (tail->next != NULL) {
412 tail = tail->next;
414 tail->next = newreq;
417 #ifdef USE_CURL_MULTI
418 fill_active_slots();
419 step_active_slots();
420 #endif
423 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
425 char *hex = sha1_to_hex(sha1);
426 char *filename;
427 char *url;
428 char tmpfile[PATH_MAX];
429 long prev_posn = 0;
430 char range[RANGE_HEADER_SIZE];
431 struct curl_slist *range_header = NULL;
433 FILE *indexfile;
434 struct active_request_slot *slot;
435 struct slot_results results;
437 if (has_pack_index(sha1))
438 return 0;
440 if (get_verbosely)
441 fprintf(stderr, "Getting index for pack %s\n", hex);
443 url = xmalloc(strlen(repo->base) + 64);
444 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
446 filename = sha1_pack_index_name(sha1);
447 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
448 indexfile = fopen(tmpfile, "a");
449 if (!indexfile)
450 return error("Unable to open local file %s for pack index",
451 filename);
453 slot = get_active_slot();
454 slot->results = &results;
455 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
456 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
457 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
458 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
459 slot->local = indexfile;
461 /* If there is data present from a previous transfer attempt,
462 resume where it left off */
463 prev_posn = ftell(indexfile);
464 if (prev_posn>0) {
465 if (get_verbosely)
466 fprintf(stderr,
467 "Resuming fetch of index for pack %s at byte %ld\n",
468 hex, prev_posn);
469 sprintf(range, "Range: bytes=%ld-", prev_posn);
470 range_header = curl_slist_append(range_header, range);
471 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
474 if (start_active_slot(slot)) {
475 run_active_slot(slot);
476 if (results.curl_result != CURLE_OK) {
477 fclose(indexfile);
478 return error("Unable to get pack index %s\n%s", url,
479 curl_errorstr);
481 } else {
482 fclose(indexfile);
483 return error("Unable to start request");
486 fclose(indexfile);
488 return move_temp_to_file(tmpfile, filename);
491 static int setup_index(struct alt_base *repo, unsigned char *sha1)
493 struct packed_git *new_pack;
494 if (has_pack_file(sha1))
495 return 0; /* don't list this as something we can get */
497 if (fetch_index(repo, sha1))
498 return -1;
500 new_pack = parse_pack_index(sha1);
501 new_pack->next = repo->packs;
502 repo->packs = new_pack;
503 return 0;
506 static void process_alternates_response(void *callback_data)
508 struct alternates_request *alt_req =
509 (struct alternates_request *)callback_data;
510 struct active_request_slot *slot = alt_req->slot;
511 struct alt_base *tail = alt;
512 const char *base = alt_req->base;
513 static const char null_byte = '\0';
514 char *data;
515 int i = 0;
517 if (alt_req->http_specific) {
518 if (slot->curl_result != CURLE_OK ||
519 !alt_req->buffer->posn) {
521 /* Try reusing the slot to get non-http alternates */
522 alt_req->http_specific = 0;
523 sprintf(alt_req->url, "%s/objects/info/alternates",
524 base);
525 curl_easy_setopt(slot->curl, CURLOPT_URL,
526 alt_req->url);
527 active_requests++;
528 slot->in_use = 1;
529 if (slot->finished != NULL)
530 (*slot->finished) = 0;
531 if (!start_active_slot(slot)) {
532 got_alternates = -1;
533 slot->in_use = 0;
534 if (slot->finished != NULL)
535 (*slot->finished) = 1;
537 return;
539 } else if (slot->curl_result != CURLE_OK) {
540 if (slot->http_code != 404 &&
541 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
542 got_alternates = -1;
543 return;
547 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
548 alt_req->buffer->posn--;
549 data = alt_req->buffer->buffer;
551 while (i < alt_req->buffer->posn) {
552 int posn = i;
553 while (posn < alt_req->buffer->posn && data[posn] != '\n')
554 posn++;
555 if (data[posn] == '\n') {
556 int okay = 0;
557 int serverlen = 0;
558 struct alt_base *newalt;
559 char *target = NULL;
560 char *path;
561 if (data[i] == '/') {
562 /* This counts
563 * http://git.host/pub/scm/linux.git/
564 * -----------here^
565 * so memcpy(dst, base, serverlen) will
566 * copy up to "...git.host".
568 const char *colon_ss = strstr(base,"://");
569 if (colon_ss) {
570 serverlen = (strchr(colon_ss + 3, '/')
571 - base);
572 okay = 1;
574 } else if (!memcmp(data + i, "../", 3)) {
575 /* Relative URL; chop the corresponding
576 * number of subpath from base (and ../
577 * from data), and concatenate the result.
579 * The code first drops ../ from data, and
580 * then drops one ../ from data and one path
581 * from base. IOW, one extra ../ is dropped
582 * from data than path is dropped from base.
584 * This is not wrong. The alternate in
585 * http://git.host/pub/scm/linux.git/
586 * to borrow from
587 * http://git.host/pub/scm/linus.git/
588 * is ../../linus.git/objects/. You need
589 * two ../../ to borrow from your direct
590 * neighbour.
592 i += 3;
593 serverlen = strlen(base);
594 while (i + 2 < posn &&
595 !memcmp(data + i, "../", 3)) {
596 do {
597 serverlen--;
598 } while (serverlen &&
599 base[serverlen - 1] != '/');
600 i += 3;
602 /* If the server got removed, give up. */
603 okay = strchr(base, ':') - base + 3 <
604 serverlen;
605 } else if (alt_req->http_specific) {
606 char *colon = strchr(data + i, ':');
607 char *slash = strchr(data + i, '/');
608 if (colon && slash && colon < data + posn &&
609 slash < data + posn && colon < slash) {
610 okay = 1;
613 /* skip "objects\n" at end */
614 if (okay) {
615 target = xmalloc(serverlen + posn - i - 6);
616 memcpy(target, base, serverlen);
617 memcpy(target + serverlen, data + i,
618 posn - i - 7);
619 target[serverlen + posn - i - 7] = 0;
620 if (get_verbosely)
621 fprintf(stderr,
622 "Also look at %s\n", target);
623 newalt = xmalloc(sizeof(*newalt));
624 newalt->next = NULL;
625 newalt->base = target;
626 newalt->got_indices = 0;
627 newalt->packs = NULL;
628 path = strstr(target, "//");
629 if (path) {
630 path = strchr(path+2, '/');
631 if (path)
632 newalt->path_len = strlen(path);
635 while (tail->next != NULL)
636 tail = tail->next;
637 tail->next = newalt;
640 i = posn + 1;
643 got_alternates = 1;
646 static void fetch_alternates(const char *base)
648 struct buffer buffer;
649 char *url;
650 char *data;
651 struct active_request_slot *slot;
652 struct alternates_request alt_req;
654 /* If another request has already started fetching alternates,
655 wait for them to arrive and return to processing this request's
656 curl message */
657 #ifdef USE_CURL_MULTI
658 while (got_alternates == 0) {
659 step_active_slots();
661 #endif
663 /* Nothing to do if they've already been fetched */
664 if (got_alternates == 1)
665 return;
667 /* Start the fetch */
668 got_alternates = 0;
670 data = xmalloc(4096);
671 buffer.size = 4096;
672 buffer.posn = 0;
673 buffer.buffer = data;
675 if (get_verbosely)
676 fprintf(stderr, "Getting alternates list for %s\n", base);
678 url = xmalloc(strlen(base) + 31);
679 sprintf(url, "%s/objects/info/http-alternates", base);
681 /* Use a callback to process the result, since another request
682 may fail and need to have alternates loaded before continuing */
683 slot = get_active_slot();
684 slot->callback_func = process_alternates_response;
685 slot->callback_data = &alt_req;
687 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
688 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
689 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
691 alt_req.base = base;
692 alt_req.url = url;
693 alt_req.buffer = &buffer;
694 alt_req.http_specific = 1;
695 alt_req.slot = slot;
697 if (start_active_slot(slot))
698 run_active_slot(slot);
699 else
700 got_alternates = -1;
702 free(data);
703 free(url);
706 #ifndef NO_EXPAT
707 static void
708 xml_start_tag(void *userData, const char *name, const char **atts)
710 struct xml_ctx *ctx = (struct xml_ctx *)userData;
711 const char *c = strchr(name, ':');
712 int new_len;
714 if (c == NULL)
715 c = name;
716 else
717 c++;
719 new_len = strlen(ctx->name) + strlen(c) + 2;
721 if (new_len > ctx->len) {
722 ctx->name = xrealloc(ctx->name, new_len);
723 ctx->len = new_len;
725 strcat(ctx->name, ".");
726 strcat(ctx->name, c);
728 free(ctx->cdata);
729 ctx->cdata = NULL;
731 ctx->userFunc(ctx, 0);
734 static void
735 xml_end_tag(void *userData, const char *name)
737 struct xml_ctx *ctx = (struct xml_ctx *)userData;
738 const char *c = strchr(name, ':');
739 char *ep;
741 ctx->userFunc(ctx, 1);
743 if (c == NULL)
744 c = name;
745 else
746 c++;
748 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
749 *ep = 0;
752 static void
753 xml_cdata(void *userData, const XML_Char *s, int len)
755 struct xml_ctx *ctx = (struct xml_ctx *)userData;
756 free(ctx->cdata);
757 ctx->cdata = xmalloc(len + 1);
758 strlcpy(ctx->cdata, s, len + 1);
761 static int remote_ls(struct alt_base *repo, const char *path, int flags,
762 void (*userFunc)(struct remote_ls_ctx *ls),
763 void *userData);
765 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
767 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
769 if (tag_closed) {
770 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
771 if (ls->dentry_flags & IS_DIR) {
772 if (ls->flags & PROCESS_DIRS) {
773 ls->userFunc(ls);
775 if (strcmp(ls->dentry_name, ls->path) &&
776 ls->flags & RECURSIVE) {
777 ls->rc = remote_ls(ls->repo,
778 ls->dentry_name,
779 ls->flags,
780 ls->userFunc,
781 ls->userData);
783 } else if (ls->flags & PROCESS_FILES) {
784 ls->userFunc(ls);
786 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
787 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
788 ls->repo->path_len + 1);
789 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
790 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
791 ls->dentry_flags |= IS_DIR;
793 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
794 free(ls->dentry_name);
795 ls->dentry_name = NULL;
796 ls->dentry_flags = 0;
800 static int remote_ls(struct alt_base *repo, const char *path, int flags,
801 void (*userFunc)(struct remote_ls_ctx *ls),
802 void *userData)
804 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
805 struct active_request_slot *slot;
806 struct slot_results results;
807 struct buffer in_buffer;
808 struct buffer out_buffer;
809 char *in_data;
810 char *out_data;
811 XML_Parser parser = XML_ParserCreate(NULL);
812 enum XML_Status result;
813 struct curl_slist *dav_headers = NULL;
814 struct xml_ctx ctx;
815 struct remote_ls_ctx ls;
817 ls.flags = flags;
818 ls.repo = repo;
819 ls.path = xstrdup(path);
820 ls.dentry_name = NULL;
821 ls.dentry_flags = 0;
822 ls.userData = userData;
823 ls.userFunc = userFunc;
824 ls.rc = 0;
826 sprintf(url, "%s%s", repo->base, path);
828 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
829 out_data = xmalloc(out_buffer.size + 1);
830 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
831 out_buffer.posn = 0;
832 out_buffer.buffer = out_data;
834 in_buffer.size = 4096;
835 in_data = xmalloc(in_buffer.size);
836 in_buffer.posn = 0;
837 in_buffer.buffer = in_data;
839 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
840 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
842 slot = get_active_slot();
843 slot->results = &results;
844 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
845 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
846 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
847 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
848 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
849 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
850 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
851 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
852 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
854 if (start_active_slot(slot)) {
855 run_active_slot(slot);
856 if (results.curl_result == CURLE_OK) {
857 ctx.name = xcalloc(10, 1);
858 ctx.len = 0;
859 ctx.cdata = NULL;
860 ctx.userFunc = handle_remote_ls_ctx;
861 ctx.userData = &ls;
862 XML_SetUserData(parser, &ctx);
863 XML_SetElementHandler(parser, xml_start_tag,
864 xml_end_tag);
865 XML_SetCharacterDataHandler(parser, xml_cdata);
866 result = XML_Parse(parser, in_buffer.buffer,
867 in_buffer.posn, 1);
868 free(ctx.name);
870 if (result != XML_STATUS_OK) {
871 ls.rc = error("XML error: %s",
872 XML_ErrorString(
873 XML_GetErrorCode(parser)));
875 } else {
876 ls.rc = -1;
878 } else {
879 ls.rc = error("Unable to start PROPFIND request");
882 free(ls.path);
883 free(url);
884 free(out_data);
885 free(in_buffer.buffer);
886 curl_slist_free_all(dav_headers);
888 return ls.rc;
891 static void process_ls_pack(struct remote_ls_ctx *ls)
893 unsigned char sha1[20];
895 if (strlen(ls->dentry_name) == 63 &&
896 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
897 has_extension(ls->dentry_name, ".pack")) {
898 get_sha1_hex(ls->dentry_name + 18, sha1);
899 setup_index(ls->repo, sha1);
902 #endif
904 static int fetch_indices(struct alt_base *repo)
906 unsigned char sha1[20];
907 char *url;
908 struct buffer buffer;
909 char *data;
910 int i = 0;
912 struct active_request_slot *slot;
913 struct slot_results results;
915 if (repo->got_indices)
916 return 0;
918 data = xmalloc(4096);
919 buffer.size = 4096;
920 buffer.posn = 0;
921 buffer.buffer = data;
923 if (get_verbosely)
924 fprintf(stderr, "Getting pack list for %s\n", repo->base);
926 #ifndef NO_EXPAT
927 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
928 process_ls_pack, NULL) == 0)
929 return 0;
930 #endif
932 url = xmalloc(strlen(repo->base) + 21);
933 sprintf(url, "%s/objects/info/packs", repo->base);
935 slot = get_active_slot();
936 slot->results = &results;
937 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
938 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
939 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
940 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
941 if (start_active_slot(slot)) {
942 run_active_slot(slot);
943 if (results.curl_result != CURLE_OK) {
944 if (results.http_code == 404 ||
945 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
946 repo->got_indices = 1;
947 free(buffer.buffer);
948 return 0;
949 } else {
950 repo->got_indices = 0;
951 free(buffer.buffer);
952 return error("%s", curl_errorstr);
955 } else {
956 repo->got_indices = 0;
957 free(buffer.buffer);
958 return error("Unable to start request");
961 data = buffer.buffer;
962 while (i < buffer.posn) {
963 switch (data[i]) {
964 case 'P':
965 i++;
966 if (i + 52 <= buffer.posn &&
967 !strncmp(data + i, " pack-", 6) &&
968 !strncmp(data + i + 46, ".pack\n", 6)) {
969 get_sha1_hex(data + i + 6, sha1);
970 setup_index(repo, sha1);
971 i += 51;
972 break;
974 default:
975 while (i < buffer.posn && data[i] != '\n')
976 i++;
978 i++;
981 free(buffer.buffer);
982 repo->got_indices = 1;
983 return 0;
986 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
988 char *url;
989 struct packed_git *target;
990 struct packed_git **lst;
991 FILE *packfile;
992 char *filename;
993 char tmpfile[PATH_MAX];
994 int ret;
995 long prev_posn = 0;
996 char range[RANGE_HEADER_SIZE];
997 struct curl_slist *range_header = NULL;
999 struct active_request_slot *slot;
1000 struct slot_results results;
1002 if (fetch_indices(repo))
1003 return -1;
1004 target = find_sha1_pack(sha1, repo->packs);
1005 if (!target)
1006 return -1;
1008 if (get_verbosely) {
1009 fprintf(stderr, "Getting pack %s\n",
1010 sha1_to_hex(target->sha1));
1011 fprintf(stderr, " which contains %s\n",
1012 sha1_to_hex(sha1));
1015 url = xmalloc(strlen(repo->base) + 65);
1016 sprintf(url, "%s/objects/pack/pack-%s.pack",
1017 repo->base, sha1_to_hex(target->sha1));
1019 filename = sha1_pack_name(target->sha1);
1020 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1021 packfile = fopen(tmpfile, "a");
1022 if (!packfile)
1023 return error("Unable to open local file %s for pack",
1024 filename);
1026 slot = get_active_slot();
1027 slot->results = &results;
1028 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1029 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1030 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1031 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1032 slot->local = packfile;
1034 /* If there is data present from a previous transfer attempt,
1035 resume where it left off */
1036 prev_posn = ftell(packfile);
1037 if (prev_posn>0) {
1038 if (get_verbosely)
1039 fprintf(stderr,
1040 "Resuming fetch of pack %s at byte %ld\n",
1041 sha1_to_hex(target->sha1), prev_posn);
1042 sprintf(range, "Range: bytes=%ld-", prev_posn);
1043 range_header = curl_slist_append(range_header, range);
1044 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1047 if (start_active_slot(slot)) {
1048 run_active_slot(slot);
1049 if (results.curl_result != CURLE_OK) {
1050 fclose(packfile);
1051 return error("Unable to get pack file %s\n%s", url,
1052 curl_errorstr);
1054 } else {
1055 fclose(packfile);
1056 return error("Unable to start request");
1059 fclose(packfile);
1061 ret = move_temp_to_file(tmpfile, filename);
1062 if (ret)
1063 return ret;
1065 lst = &repo->packs;
1066 while (*lst != target)
1067 lst = &((*lst)->next);
1068 *lst = (*lst)->next;
1070 if (verify_pack(target, 0))
1071 return -1;
1072 install_packed_git(target);
1074 return 0;
1077 static void abort_object_request(struct object_request *obj_req)
1079 if (obj_req->local >= 0) {
1080 close(obj_req->local);
1081 obj_req->local = -1;
1083 unlink(obj_req->tmpfile);
1084 if (obj_req->slot) {
1085 release_active_slot(obj_req->slot);
1086 obj_req->slot = NULL;
1088 release_object_request(obj_req);
1091 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1093 char *hex = sha1_to_hex(sha1);
1094 int ret = 0;
1095 struct object_request *obj_req = object_queue_head;
1097 while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1098 obj_req = obj_req->next;
1099 if (obj_req == NULL)
1100 return error("Couldn't find request for %s in the queue", hex);
1102 if (has_sha1_file(obj_req->sha1)) {
1103 abort_object_request(obj_req);
1104 return 0;
1107 #ifdef USE_CURL_MULTI
1108 while (obj_req->state == WAITING) {
1109 step_active_slots();
1111 #else
1112 start_object_request(obj_req);
1113 #endif
1115 while (obj_req->state == ACTIVE) {
1116 run_active_slot(obj_req->slot);
1118 if (obj_req->local != -1) {
1119 close(obj_req->local); obj_req->local = -1;
1122 if (obj_req->state == ABORTED) {
1123 ret = error("Request for %s aborted", hex);
1124 } else if (obj_req->curl_result != CURLE_OK &&
1125 obj_req->http_code != 416) {
1126 if (obj_req->http_code == 404 ||
1127 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1128 ret = -1; /* Be silent, it is probably in a pack. */
1129 else
1130 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1131 obj_req->errorstr, obj_req->curl_result,
1132 obj_req->http_code, hex);
1133 } else if (obj_req->zret != Z_STREAM_END) {
1134 corrupt_object_found++;
1135 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1136 } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1137 ret = error("File %s has bad hash", hex);
1138 } else if (obj_req->rename < 0) {
1139 ret = error("unable to write sha1 filename %s",
1140 obj_req->filename);
1143 release_object_request(obj_req);
1144 return ret;
1147 int fetch(unsigned char *sha1)
1149 struct alt_base *altbase = alt;
1151 if (!fetch_object(altbase, sha1))
1152 return 0;
1153 while (altbase) {
1154 if (!fetch_pack(altbase, sha1))
1155 return 0;
1156 fetch_alternates(alt->base);
1157 altbase = altbase->next;
1159 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1160 alt->base);
1163 static inline int needs_quote(int ch)
1165 if (((ch >= 'A') && (ch <= 'Z'))
1166 || ((ch >= 'a') && (ch <= 'z'))
1167 || ((ch >= '0') && (ch <= '9'))
1168 || (ch == '/')
1169 || (ch == '-')
1170 || (ch == '.'))
1171 return 0;
1172 return 1;
1175 static inline int hex(int v)
1177 if (v < 10) return '0' + v;
1178 else return 'A' + v - 10;
1181 static char *quote_ref_url(const char *base, const char *ref)
1183 const char *cp;
1184 char *dp, *qref;
1185 int len, baselen, ch;
1187 baselen = strlen(base);
1188 len = baselen + 6; /* "refs/" + NUL */
1189 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1190 if (needs_quote(ch))
1191 len += 2; /* extra two hex plus replacement % */
1192 qref = xmalloc(len);
1193 memcpy(qref, base, baselen);
1194 memcpy(qref + baselen, "refs/", 5);
1195 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1196 if (needs_quote(ch)) {
1197 *dp++ = '%';
1198 *dp++ = hex((ch >> 4) & 0xF);
1199 *dp++ = hex(ch & 0xF);
1201 else
1202 *dp++ = ch;
1204 *dp = 0;
1206 return qref;
1209 int fetch_ref(char *ref, unsigned char *sha1)
1211 char *url;
1212 char hex[42];
1213 struct buffer buffer;
1214 const char *base = alt->base;
1215 struct active_request_slot *slot;
1216 struct slot_results results;
1217 buffer.size = 41;
1218 buffer.posn = 0;
1219 buffer.buffer = hex;
1220 hex[41] = '\0';
1222 url = quote_ref_url(base, ref);
1223 slot = get_active_slot();
1224 slot->results = &results;
1225 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1226 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1227 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1228 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1229 if (start_active_slot(slot)) {
1230 run_active_slot(slot);
1231 if (results.curl_result != CURLE_OK)
1232 return error("Couldn't get %s for %s\n%s",
1233 url, ref, curl_errorstr);
1234 } else {
1235 return error("Unable to start request");
1238 hex[40] = '\0';
1239 get_sha1_hex(hex, sha1);
1240 return 0;
1243 int main(int argc, const char **argv)
1245 int commits;
1246 const char **write_ref = NULL;
1247 char **commit_id;
1248 const char *url;
1249 char *path;
1250 int arg = 1;
1251 int rc = 0;
1253 setup_ident();
1254 setup_git_directory();
1255 git_config(git_default_config);
1257 while (arg < argc && argv[arg][0] == '-') {
1258 if (argv[arg][1] == 't') {
1259 get_tree = 1;
1260 } else if (argv[arg][1] == 'c') {
1261 get_history = 1;
1262 } else if (argv[arg][1] == 'a') {
1263 get_all = 1;
1264 get_tree = 1;
1265 get_history = 1;
1266 } else if (argv[arg][1] == 'v') {
1267 get_verbosely = 1;
1268 } else if (argv[arg][1] == 'w') {
1269 write_ref = &argv[arg + 1];
1270 arg++;
1271 } else if (!strcmp(argv[arg], "--recover")) {
1272 get_recover = 1;
1273 } else if (!strcmp(argv[arg], "--stdin")) {
1274 commits_on_stdin = 1;
1276 arg++;
1278 if (argc < arg + 2 - commits_on_stdin) {
1279 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1280 return 1;
1282 if (commits_on_stdin) {
1283 commits = pull_targets_stdin(&commit_id, &write_ref);
1284 } else {
1285 commit_id = (char **) &argv[arg++];
1286 commits = 1;
1288 url = argv[arg];
1290 http_init();
1292 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1294 alt = xmalloc(sizeof(*alt));
1295 alt->base = url;
1296 alt->got_indices = 0;
1297 alt->packs = NULL;
1298 alt->next = NULL;
1299 path = strstr(url, "//");
1300 if (path) {
1301 path = strchr(path+2, '/');
1302 if (path)
1303 alt->path_len = strlen(path);
1306 if (pull(commits, commit_id, write_ref, url))
1307 rc = 1;
1309 http_cleanup();
1311 curl_slist_free_all(no_pragma_header);
1313 if (commits_on_stdin)
1314 pull_targets_free(commits, commit_id, write_ref);
1316 if (corrupt_object_found) {
1317 fprintf(stderr,
1318 "Some loose object were found to be corrupt, but they might be just\n"
1319 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1320 "status code. Suggest running git fsck-objects.\n");
1322 return rc;