fix pack-object buffer size
[git/fastimport.git] / http-fetch.c
blob861644b27ee0c01a780b68daa43c259dd978a538
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
42 static struct curl_slist *no_pragma_header;
44 struct alt_base
46 char *base;
47 int path_len;
48 int got_indices;
49 struct packed_git *packs;
50 struct alt_base *next;
53 static struct alt_base *alt = NULL;
55 enum object_request_state {
56 WAITING,
57 ABORTED,
58 ACTIVE,
59 COMPLETE,
62 struct object_request
64 unsigned char sha1[20];
65 struct alt_base *repo;
66 char *url;
67 char filename[PATH_MAX];
68 char tmpfile[PATH_MAX];
69 int local;
70 enum object_request_state state;
71 CURLcode curl_result;
72 char errorstr[CURL_ERROR_SIZE];
73 long http_code;
74 unsigned char real_sha1[20];
75 SHA_CTX c;
76 z_stream stream;
77 int zret;
78 int rename;
79 struct active_request_slot *slot;
80 struct object_request *next;
83 struct alternates_request {
84 char *base;
85 char *url;
86 struct buffer *buffer;
87 struct active_request_slot *slot;
88 int http_specific;
91 #ifndef NO_EXPAT
92 struct xml_ctx
94 char *name;
95 int len;
96 char *cdata;
97 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98 void *userData;
101 struct remote_ls_ctx
103 struct alt_base *repo;
104 char *path;
105 void (*userFunc)(struct remote_ls_ctx *ls);
106 void *userData;
107 int flags;
108 char *dentry_name;
109 int dentry_flags;
110 int rc;
111 struct remote_ls_ctx *parent;
113 #endif
115 static struct object_request *object_queue_head = NULL;
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118 void *data)
120 unsigned char expn[4096];
121 size_t size = eltsize * nmemb;
122 int posn = 0;
123 struct object_request *obj_req = (struct object_request *)data;
124 do {
125 ssize_t retval = write(obj_req->local,
126 ptr + posn, size - posn);
127 if (retval < 0)
128 return posn;
129 posn += retval;
130 } while (posn < size);
132 obj_req->stream.avail_in = size;
133 obj_req->stream.next_in = ptr;
134 do {
135 obj_req->stream.next_out = expn;
136 obj_req->stream.avail_out = sizeof(expn);
137 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138 SHA1_Update(&obj_req->c, expn,
139 sizeof(expn) - obj_req->stream.avail_out);
140 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141 data_received++;
142 return size;
145 static void fetch_alternates(char *base);
147 static void process_object_response(void *callback_data);
149 static void start_object_request(struct object_request *obj_req)
151 char *hex = sha1_to_hex(obj_req->sha1);
152 char prevfile[PATH_MAX];
153 char *url;
154 char *posn;
155 int prevlocal;
156 unsigned char prev_buf[PREV_BUF_SIZE];
157 ssize_t prev_read = 0;
158 long prev_posn = 0;
159 char range[RANGE_HEADER_SIZE];
160 struct curl_slist *range_header = NULL;
161 struct active_request_slot *slot;
163 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164 unlink(prevfile);
165 rename(obj_req->tmpfile, prevfile);
166 unlink(obj_req->tmpfile);
168 if (obj_req->local != -1)
169 error("fd leakage in start: %d", obj_req->local);
170 obj_req->local = open(obj_req->tmpfile,
171 O_WRONLY | O_CREAT | O_EXCL, 0666);
172 /* This could have failed due to the "lazy directory creation";
173 * try to mkdir the last path component.
175 if (obj_req->local < 0 && errno == ENOENT) {
176 char *dir = strrchr(obj_req->tmpfile, '/');
177 if (dir) {
178 *dir = 0;
179 mkdir(obj_req->tmpfile, 0777);
180 *dir = '/';
182 obj_req->local = open(obj_req->tmpfile,
183 O_WRONLY | O_CREAT | O_EXCL, 0666);
186 if (obj_req->local < 0) {
187 obj_req->state = ABORTED;
188 error("Couldn't create temporary file %s for %s: %s",
189 obj_req->tmpfile, obj_req->filename, strerror(errno));
190 return;
193 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
195 inflateInit(&obj_req->stream);
197 SHA1_Init(&obj_req->c);
199 url = xmalloc(strlen(obj_req->repo->base) + 50);
200 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201 strcpy(url, obj_req->repo->base);
202 posn = url + strlen(obj_req->repo->base);
203 strcpy(posn, "objects/");
204 posn += 8;
205 memcpy(posn, hex, 2);
206 posn += 2;
207 *(posn++) = '/';
208 strcpy(posn, hex + 2);
209 strcpy(obj_req->url, url);
211 /* If a previous temp file is present, process what was already
212 fetched. */
213 prevlocal = open(prevfile, O_RDONLY);
214 if (prevlocal != -1) {
215 do {
216 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217 if (prev_read>0) {
218 if (fwrite_sha1_file(prev_buf,
220 prev_read,
221 obj_req) == prev_read) {
222 prev_posn += prev_read;
223 } else {
224 prev_read = -1;
227 } while (prev_read > 0);
228 close(prevlocal);
230 unlink(prevfile);
232 /* Reset inflate/SHA1 if there was an error reading the previous temp
233 file; also rewind to the beginning of the local file. */
234 if (prev_read == -1) {
235 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236 inflateInit(&obj_req->stream);
237 SHA1_Init(&obj_req->c);
238 if (prev_posn>0) {
239 prev_posn = 0;
240 lseek(obj_req->local, SEEK_SET, 0);
241 ftruncate(obj_req->local, 0);
245 slot = get_active_slot();
246 slot->callback_func = process_object_response;
247 slot->callback_data = obj_req;
248 obj_req->slot = slot;
250 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
256 /* If we have successfully processed data from a previous fetch
257 attempt, only fetch the data we don't already have. */
258 if (prev_posn>0) {
259 if (get_verbosely)
260 fprintf(stderr,
261 "Resuming fetch of object %s at byte %ld\n",
262 hex, prev_posn);
263 sprintf(range, "Range: bytes=%ld-", prev_posn);
264 range_header = curl_slist_append(range_header, range);
265 curl_easy_setopt(slot->curl,
266 CURLOPT_HTTPHEADER, range_header);
269 /* Try to get the request started, abort the request on error */
270 obj_req->state = ACTIVE;
271 if (!start_active_slot(slot)) {
272 obj_req->state = ABORTED;
273 obj_req->slot = NULL;
274 close(obj_req->local); obj_req->local = -1;
275 free(obj_req->url);
276 return;
280 static void finish_object_request(struct object_request *obj_req)
282 struct stat st;
284 fchmod(obj_req->local, 0444);
285 close(obj_req->local); obj_req->local = -1;
287 if (obj_req->http_code == 416) {
288 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289 } else if (obj_req->curl_result != CURLE_OK) {
290 if (stat(obj_req->tmpfile, &st) == 0)
291 if (st.st_size == 0)
292 unlink(obj_req->tmpfile);
293 return;
296 inflateEnd(&obj_req->stream);
297 SHA1_Final(obj_req->real_sha1, &obj_req->c);
298 if (obj_req->zret != Z_STREAM_END) {
299 unlink(obj_req->tmpfile);
300 return;
302 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303 unlink(obj_req->tmpfile);
304 return;
306 obj_req->rename =
307 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
309 if (obj_req->rename == 0)
310 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 static void process_object_response(void *callback_data)
315 struct object_request *obj_req =
316 (struct object_request *)callback_data;
318 obj_req->curl_result = obj_req->slot->curl_result;
319 obj_req->http_code = obj_req->slot->http_code;
320 obj_req->slot = NULL;
321 obj_req->state = COMPLETE;
323 /* Use alternates if necessary */
324 if (obj_req->http_code == 404 ||
325 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326 fetch_alternates(alt->base);
327 if (obj_req->repo->next != NULL) {
328 obj_req->repo =
329 obj_req->repo->next;
330 close(obj_req->local);
331 obj_req->local = -1;
332 start_object_request(obj_req);
333 return;
337 finish_object_request(obj_req);
340 static void release_object_request(struct object_request *obj_req)
342 struct object_request *entry = object_queue_head;
344 if (obj_req->local != -1)
345 error("fd leakage in release: %d", obj_req->local);
346 if (obj_req == object_queue_head) {
347 object_queue_head = obj_req->next;
348 } else {
349 while (entry->next != NULL && entry->next != obj_req)
350 entry = entry->next;
351 if (entry->next == obj_req)
352 entry->next = entry->next->next;
355 free(obj_req->url);
356 free(obj_req);
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
362 struct object_request *obj_req = object_queue_head;
363 struct active_request_slot *slot = active_queue_head;
364 int num_transfers;
366 while (active_requests < max_requests && obj_req != NULL) {
367 if (obj_req->state == WAITING) {
368 if (has_sha1_file(obj_req->sha1))
369 obj_req->state = COMPLETE;
370 else
371 start_object_request(obj_req);
372 curl_multi_perform(curlm, &num_transfers);
374 obj_req = obj_req->next;
377 while (slot != NULL) {
378 if (!slot->in_use && slot->curl != NULL) {
379 curl_easy_cleanup(slot->curl);
380 slot->curl = NULL;
382 slot = slot->next;
385 #endif
387 void prefetch(unsigned char *sha1)
389 struct object_request *newreq;
390 struct object_request *tail;
391 char *filename = sha1_file_name(sha1);
393 newreq = xmalloc(sizeof(*newreq));
394 memcpy(newreq->sha1, sha1, 20);
395 newreq->repo = alt;
396 newreq->url = NULL;
397 newreq->local = -1;
398 newreq->state = WAITING;
399 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401 "%s.temp", filename);
402 newreq->next = NULL;
404 if (object_queue_head == NULL) {
405 object_queue_head = newreq;
406 } else {
407 tail = object_queue_head;
408 while (tail->next != NULL) {
409 tail = tail->next;
411 tail->next = newreq;
414 #ifdef USE_CURL_MULTI
415 fill_active_slots();
416 step_active_slots();
417 #endif
420 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
422 char *hex = sha1_to_hex(sha1);
423 char *filename;
424 char *url;
425 char tmpfile[PATH_MAX];
426 long prev_posn = 0;
427 char range[RANGE_HEADER_SIZE];
428 struct curl_slist *range_header = NULL;
430 FILE *indexfile;
431 struct active_request_slot *slot;
432 struct slot_results results;
434 if (has_pack_index(sha1))
435 return 0;
437 if (get_verbosely)
438 fprintf(stderr, "Getting index for pack %s\n", hex);
440 url = xmalloc(strlen(repo->base) + 64);
441 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
443 filename = sha1_pack_index_name(sha1);
444 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
445 indexfile = fopen(tmpfile, "a");
446 if (!indexfile)
447 return error("Unable to open local file %s for pack index",
448 filename);
450 slot = get_active_slot();
451 slot->results = &results;
452 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
453 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
454 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
455 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
456 slot->local = indexfile;
458 /* If there is data present from a previous transfer attempt,
459 resume where it left off */
460 prev_posn = ftell(indexfile);
461 if (prev_posn>0) {
462 if (get_verbosely)
463 fprintf(stderr,
464 "Resuming fetch of index for pack %s at byte %ld\n",
465 hex, prev_posn);
466 sprintf(range, "Range: bytes=%ld-", prev_posn);
467 range_header = curl_slist_append(range_header, range);
468 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
471 if (start_active_slot(slot)) {
472 run_active_slot(slot);
473 if (results.curl_result != CURLE_OK) {
474 fclose(indexfile);
475 return error("Unable to get pack index %s\n%s", url,
476 curl_errorstr);
478 } else {
479 fclose(indexfile);
480 return error("Unable to start request");
483 fclose(indexfile);
485 return move_temp_to_file(tmpfile, filename);
488 static int setup_index(struct alt_base *repo, unsigned char *sha1)
490 struct packed_git *new_pack;
491 if (has_pack_file(sha1))
492 return 0; // don't list this as something we can get
494 if (fetch_index(repo, sha1))
495 return -1;
497 new_pack = parse_pack_index(sha1);
498 new_pack->next = repo->packs;
499 repo->packs = new_pack;
500 return 0;
503 static void process_alternates_response(void *callback_data)
505 struct alternates_request *alt_req =
506 (struct alternates_request *)callback_data;
507 struct active_request_slot *slot = alt_req->slot;
508 struct alt_base *tail = alt;
509 char *base = alt_req->base;
510 static const char null_byte = '\0';
511 char *data;
512 int i = 0;
514 if (alt_req->http_specific) {
515 if (slot->curl_result != CURLE_OK ||
516 !alt_req->buffer->posn) {
518 /* Try reusing the slot to get non-http alternates */
519 alt_req->http_specific = 0;
520 sprintf(alt_req->url, "%s/objects/info/alternates",
521 base);
522 curl_easy_setopt(slot->curl, CURLOPT_URL,
523 alt_req->url);
524 active_requests++;
525 slot->in_use = 1;
526 if (slot->finished != NULL)
527 (*slot->finished) = 0;
528 if (!start_active_slot(slot)) {
529 got_alternates = -1;
530 slot->in_use = 0;
531 if (slot->finished != NULL)
532 (*slot->finished) = 1;
534 return;
536 } else if (slot->curl_result != CURLE_OK) {
537 if (slot->http_code != 404 &&
538 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
539 got_alternates = -1;
540 return;
544 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
545 alt_req->buffer->posn--;
546 data = alt_req->buffer->buffer;
548 while (i < alt_req->buffer->posn) {
549 int posn = i;
550 while (posn < alt_req->buffer->posn && data[posn] != '\n')
551 posn++;
552 if (data[posn] == '\n') {
553 int okay = 0;
554 int serverlen = 0;
555 struct alt_base *newalt;
556 char *target = NULL;
557 char *path;
558 if (data[i] == '/') {
559 serverlen = strchr(base + 8, '/') - base;
560 okay = 1;
561 } else if (!memcmp(data + i, "../", 3)) {
562 i += 3;
563 serverlen = strlen(base);
564 while (i + 2 < posn &&
565 !memcmp(data + i, "../", 3)) {
566 do {
567 serverlen--;
568 } while (serverlen &&
569 base[serverlen - 1] != '/');
570 i += 3;
572 // If the server got removed, give up.
573 okay = strchr(base, ':') - base + 3 <
574 serverlen;
575 } else if (alt_req->http_specific) {
576 char *colon = strchr(data + i, ':');
577 char *slash = strchr(data + i, '/');
578 if (colon && slash && colon < data + posn &&
579 slash < data + posn && colon < slash) {
580 okay = 1;
583 // skip 'objects' at end
584 if (okay) {
585 target = xmalloc(serverlen + posn - i - 6);
586 strncpy(target, base, serverlen);
587 strncpy(target + serverlen, data + i,
588 posn - i - 7);
589 target[serverlen + posn - i - 7] = '\0';
590 if (get_verbosely)
591 fprintf(stderr,
592 "Also look at %s\n", target);
593 newalt = xmalloc(sizeof(*newalt));
594 newalt->next = NULL;
595 newalt->base = target;
596 newalt->got_indices = 0;
597 newalt->packs = NULL;
598 path = strstr(target, "//");
599 if (path) {
600 path = strchr(path+2, '/');
601 if (path)
602 newalt->path_len = strlen(path);
605 while (tail->next != NULL)
606 tail = tail->next;
607 tail->next = newalt;
610 i = posn + 1;
613 got_alternates = 1;
616 static void fetch_alternates(char *base)
618 struct buffer buffer;
619 char *url;
620 char *data;
621 struct active_request_slot *slot;
622 struct alternates_request alt_req;
624 /* If another request has already started fetching alternates,
625 wait for them to arrive and return to processing this request's
626 curl message */
627 #ifdef USE_CURL_MULTI
628 while (got_alternates == 0) {
629 step_active_slots();
631 #endif
633 /* Nothing to do if they've already been fetched */
634 if (got_alternates == 1)
635 return;
637 /* Start the fetch */
638 got_alternates = 0;
640 data = xmalloc(4096);
641 buffer.size = 4096;
642 buffer.posn = 0;
643 buffer.buffer = data;
645 if (get_verbosely)
646 fprintf(stderr, "Getting alternates list for %s\n", base);
648 url = xmalloc(strlen(base) + 31);
649 sprintf(url, "%s/objects/info/http-alternates", base);
651 /* Use a callback to process the result, since another request
652 may fail and need to have alternates loaded before continuing */
653 slot = get_active_slot();
654 slot->callback_func = process_alternates_response;
655 slot->callback_data = &alt_req;
657 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
658 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
659 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
661 alt_req.base = base;
662 alt_req.url = url;
663 alt_req.buffer = &buffer;
664 alt_req.http_specific = 1;
665 alt_req.slot = slot;
667 if (start_active_slot(slot))
668 run_active_slot(slot);
669 else
670 got_alternates = -1;
672 free(data);
673 free(url);
676 #ifndef NO_EXPAT
677 static void
678 xml_start_tag(void *userData, const char *name, const char **atts)
680 struct xml_ctx *ctx = (struct xml_ctx *)userData;
681 const char *c = strchr(name, ':');
682 int new_len;
684 if (c == NULL)
685 c = name;
686 else
687 c++;
689 new_len = strlen(ctx->name) + strlen(c) + 2;
691 if (new_len > ctx->len) {
692 ctx->name = xrealloc(ctx->name, new_len);
693 ctx->len = new_len;
695 strcat(ctx->name, ".");
696 strcat(ctx->name, c);
698 if (ctx->cdata) {
699 free(ctx->cdata);
700 ctx->cdata = NULL;
703 ctx->userFunc(ctx, 0);
706 static void
707 xml_end_tag(void *userData, const char *name)
709 struct xml_ctx *ctx = (struct xml_ctx *)userData;
710 const char *c = strchr(name, ':');
711 char *ep;
713 ctx->userFunc(ctx, 1);
715 if (c == NULL)
716 c = name;
717 else
718 c++;
720 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
721 *ep = 0;
724 static void
725 xml_cdata(void *userData, const XML_Char *s, int len)
727 struct xml_ctx *ctx = (struct xml_ctx *)userData;
728 if (ctx->cdata)
729 free(ctx->cdata);
730 ctx->cdata = xcalloc(len+1, 1);
731 strncpy(ctx->cdata, s, len);
734 static int remote_ls(struct alt_base *repo, const char *path, int flags,
735 void (*userFunc)(struct remote_ls_ctx *ls),
736 void *userData);
738 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
740 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
742 if (tag_closed) {
743 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
744 if (ls->dentry_flags & IS_DIR) {
745 if (ls->flags & PROCESS_DIRS) {
746 ls->userFunc(ls);
748 if (strcmp(ls->dentry_name, ls->path) &&
749 ls->flags & RECURSIVE) {
750 ls->rc = remote_ls(ls->repo,
751 ls->dentry_name,
752 ls->flags,
753 ls->userFunc,
754 ls->userData);
756 } else if (ls->flags & PROCESS_FILES) {
757 ls->userFunc(ls);
759 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
760 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
761 ls->repo->path_len + 1);
762 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
763 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
764 ls->dentry_flags |= IS_DIR;
766 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
767 if (ls->dentry_name) {
768 free(ls->dentry_name);
770 ls->dentry_name = NULL;
771 ls->dentry_flags = 0;
775 static int remote_ls(struct alt_base *repo, const char *path, int flags,
776 void (*userFunc)(struct remote_ls_ctx *ls),
777 void *userData)
779 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
780 struct active_request_slot *slot;
781 struct slot_results results;
782 struct buffer in_buffer;
783 struct buffer out_buffer;
784 char *in_data;
785 char *out_data;
786 XML_Parser parser = XML_ParserCreate(NULL);
787 enum XML_Status result;
788 struct curl_slist *dav_headers = NULL;
789 struct xml_ctx ctx;
790 struct remote_ls_ctx ls;
792 ls.flags = flags;
793 ls.repo = repo;
794 ls.path = strdup(path);
795 ls.dentry_name = NULL;
796 ls.dentry_flags = 0;
797 ls.userData = userData;
798 ls.userFunc = userFunc;
799 ls.rc = 0;
801 sprintf(url, "%s%s", repo->base, path);
803 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
804 out_data = xmalloc(out_buffer.size + 1);
805 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
806 out_buffer.posn = 0;
807 out_buffer.buffer = out_data;
809 in_buffer.size = 4096;
810 in_data = xmalloc(in_buffer.size);
811 in_buffer.posn = 0;
812 in_buffer.buffer = in_data;
814 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
815 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
817 slot = get_active_slot();
818 slot->results = &results;
819 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
820 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
821 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
822 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
823 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
824 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
825 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
826 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
827 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
829 if (start_active_slot(slot)) {
830 run_active_slot(slot);
831 if (results.curl_result == CURLE_OK) {
832 ctx.name = xcalloc(10, 1);
833 ctx.len = 0;
834 ctx.cdata = NULL;
835 ctx.userFunc = handle_remote_ls_ctx;
836 ctx.userData = &ls;
837 XML_SetUserData(parser, &ctx);
838 XML_SetElementHandler(parser, xml_start_tag,
839 xml_end_tag);
840 XML_SetCharacterDataHandler(parser, xml_cdata);
841 result = XML_Parse(parser, in_buffer.buffer,
842 in_buffer.posn, 1);
843 free(ctx.name);
845 if (result != XML_STATUS_OK) {
846 ls.rc = error("XML error: %s",
847 XML_ErrorString(
848 XML_GetErrorCode(parser)));
850 } else {
851 ls.rc = -1;
853 } else {
854 ls.rc = error("Unable to start PROPFIND request");
857 free(ls.path);
858 free(url);
859 free(out_data);
860 free(in_buffer.buffer);
861 curl_slist_free_all(dav_headers);
863 return ls.rc;
866 static void process_ls_pack(struct remote_ls_ctx *ls)
868 unsigned char sha1[20];
870 if (strlen(ls->dentry_name) == 63 &&
871 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
872 !strncmp(ls->dentry_name+58, ".pack", 5)) {
873 get_sha1_hex(ls->dentry_name + 18, sha1);
874 setup_index(ls->repo, sha1);
877 #endif
879 static int fetch_indices(struct alt_base *repo)
881 unsigned char sha1[20];
882 char *url;
883 struct buffer buffer;
884 char *data;
885 int i = 0;
887 struct active_request_slot *slot;
888 struct slot_results results;
890 if (repo->got_indices)
891 return 0;
893 data = xmalloc(4096);
894 buffer.size = 4096;
895 buffer.posn = 0;
896 buffer.buffer = data;
898 if (get_verbosely)
899 fprintf(stderr, "Getting pack list for %s\n", repo->base);
901 #ifndef NO_EXPAT
902 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
903 process_ls_pack, NULL) == 0)
904 return 0;
905 #endif
907 url = xmalloc(strlen(repo->base) + 21);
908 sprintf(url, "%s/objects/info/packs", repo->base);
910 slot = get_active_slot();
911 slot->results = &results;
912 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
913 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
914 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
915 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
916 if (start_active_slot(slot)) {
917 run_active_slot(slot);
918 if (results.curl_result != CURLE_OK) {
919 if (results.http_code == 404 ||
920 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
921 repo->got_indices = 1;
922 free(buffer.buffer);
923 return 0;
924 } else {
925 repo->got_indices = 0;
926 free(buffer.buffer);
927 return error("%s", curl_errorstr);
930 } else {
931 repo->got_indices = 0;
932 free(buffer.buffer);
933 return error("Unable to start request");
936 data = buffer.buffer;
937 while (i < buffer.posn) {
938 switch (data[i]) {
939 case 'P':
940 i++;
941 if (i + 52 <= buffer.posn &&
942 !strncmp(data + i, " pack-", 6) &&
943 !strncmp(data + i + 46, ".pack\n", 6)) {
944 get_sha1_hex(data + i + 6, sha1);
945 setup_index(repo, sha1);
946 i += 51;
947 break;
949 default:
950 while (i < buffer.posn && data[i] != '\n')
951 i++;
953 i++;
956 free(buffer.buffer);
957 repo->got_indices = 1;
958 return 0;
961 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
963 char *url;
964 struct packed_git *target;
965 struct packed_git **lst;
966 FILE *packfile;
967 char *filename;
968 char tmpfile[PATH_MAX];
969 int ret;
970 long prev_posn = 0;
971 char range[RANGE_HEADER_SIZE];
972 struct curl_slist *range_header = NULL;
974 struct active_request_slot *slot;
975 struct slot_results results;
977 if (fetch_indices(repo))
978 return -1;
979 target = find_sha1_pack(sha1, repo->packs);
980 if (!target)
981 return -1;
983 if (get_verbosely) {
984 fprintf(stderr, "Getting pack %s\n",
985 sha1_to_hex(target->sha1));
986 fprintf(stderr, " which contains %s\n",
987 sha1_to_hex(sha1));
990 url = xmalloc(strlen(repo->base) + 65);
991 sprintf(url, "%s/objects/pack/pack-%s.pack",
992 repo->base, sha1_to_hex(target->sha1));
994 filename = sha1_pack_name(target->sha1);
995 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
996 packfile = fopen(tmpfile, "a");
997 if (!packfile)
998 return error("Unable to open local file %s for pack",
999 filename);
1001 slot = get_active_slot();
1002 slot->results = &results;
1003 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1004 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1005 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1006 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1007 slot->local = packfile;
1009 /* If there is data present from a previous transfer attempt,
1010 resume where it left off */
1011 prev_posn = ftell(packfile);
1012 if (prev_posn>0) {
1013 if (get_verbosely)
1014 fprintf(stderr,
1015 "Resuming fetch of pack %s at byte %ld\n",
1016 sha1_to_hex(target->sha1), prev_posn);
1017 sprintf(range, "Range: bytes=%ld-", prev_posn);
1018 range_header = curl_slist_append(range_header, range);
1019 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1022 if (start_active_slot(slot)) {
1023 run_active_slot(slot);
1024 if (results.curl_result != CURLE_OK) {
1025 fclose(packfile);
1026 return error("Unable to get pack file %s\n%s", url,
1027 curl_errorstr);
1029 } else {
1030 fclose(packfile);
1031 return error("Unable to start request");
1034 fclose(packfile);
1036 ret = move_temp_to_file(tmpfile, filename);
1037 if (ret)
1038 return ret;
1040 lst = &repo->packs;
1041 while (*lst != target)
1042 lst = &((*lst)->next);
1043 *lst = (*lst)->next;
1045 if (verify_pack(target, 0))
1046 return -1;
1047 install_packed_git(target);
1049 return 0;
1052 static void abort_object_request(struct object_request *obj_req)
1054 if (obj_req->local >= 0) {
1055 close(obj_req->local);
1056 obj_req->local = -1;
1058 unlink(obj_req->tmpfile);
1059 if (obj_req->slot) {
1060 release_active_slot(obj_req->slot);
1061 obj_req->slot = NULL;
1063 release_object_request(obj_req);
1066 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068 char *hex = sha1_to_hex(sha1);
1069 int ret = 0;
1070 struct object_request *obj_req = object_queue_head;
1072 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1073 obj_req = obj_req->next;
1074 if (obj_req == NULL)
1075 return error("Couldn't find request for %s in the queue", hex);
1077 if (has_sha1_file(obj_req->sha1)) {
1078 abort_object_request(obj_req);
1079 return 0;
1082 #ifdef USE_CURL_MULTI
1083 while (obj_req->state == WAITING) {
1084 step_active_slots();
1086 #else
1087 start_object_request(obj_req);
1088 #endif
1090 while (obj_req->state == ACTIVE) {
1091 run_active_slot(obj_req->slot);
1093 if (obj_req->local != -1) {
1094 close(obj_req->local); obj_req->local = -1;
1097 if (obj_req->state == ABORTED) {
1098 ret = error("Request for %s aborted", hex);
1099 } else if (obj_req->curl_result != CURLE_OK &&
1100 obj_req->http_code != 416) {
1101 if (obj_req->http_code == 404 ||
1102 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1103 ret = -1; /* Be silent, it is probably in a pack. */
1104 else
1105 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1106 obj_req->errorstr, obj_req->curl_result,
1107 obj_req->http_code, hex);
1108 } else if (obj_req->zret != Z_STREAM_END) {
1109 corrupt_object_found++;
1110 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1111 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1112 ret = error("File %s has bad hash", hex);
1113 } else if (obj_req->rename < 0) {
1114 ret = error("unable to write sha1 filename %s",
1115 obj_req->filename);
1118 release_object_request(obj_req);
1119 return ret;
1122 int fetch(unsigned char *sha1)
1124 struct alt_base *altbase = alt;
1126 if (!fetch_object(altbase, sha1))
1127 return 0;
1128 while (altbase) {
1129 if (!fetch_pack(altbase, sha1))
1130 return 0;
1131 fetch_alternates(alt->base);
1132 altbase = altbase->next;
1134 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1135 alt->base);
1138 static inline int needs_quote(int ch)
1140 switch (ch) {
1141 case '/': case '-': case '.':
1142 case 'A'...'Z': case 'a'...'z': case '0'...'9':
1143 return 0;
1144 default:
1145 return 1;
1149 static inline int hex(int v)
1151 if (v < 10) return '0' + v;
1152 else return 'A' + v - 10;
1155 static char *quote_ref_url(const char *base, const char *ref)
1157 const char *cp;
1158 char *dp, *qref;
1159 int len, baselen, ch;
1161 baselen = strlen(base);
1162 len = baselen + 6; /* "refs/" + NUL */
1163 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164 if (needs_quote(ch))
1165 len += 2; /* extra two hex plus replacement % */
1166 qref = xmalloc(len);
1167 memcpy(qref, base, baselen);
1168 memcpy(qref + baselen, "refs/", 5);
1169 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170 if (needs_quote(ch)) {
1171 *dp++ = '%';
1172 *dp++ = hex((ch >> 4) & 0xF);
1173 *dp++ = hex(ch & 0xF);
1175 else
1176 *dp++ = ch;
1178 *dp = 0;
1180 return qref;
1183 int fetch_ref(char *ref, unsigned char *sha1)
1185 char *url;
1186 char hex[42];
1187 struct buffer buffer;
1188 char *base = alt->base;
1189 struct active_request_slot *slot;
1190 struct slot_results results;
1191 buffer.size = 41;
1192 buffer.posn = 0;
1193 buffer.buffer = hex;
1194 hex[41] = '\0';
1196 url = quote_ref_url(base, ref);
1197 slot = get_active_slot();
1198 slot->results = &results;
1199 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203 if (start_active_slot(slot)) {
1204 run_active_slot(slot);
1205 if (results.curl_result != CURLE_OK)
1206 return error("Couldn't get %s for %s\n%s",
1207 url, ref, curl_errorstr);
1208 } else {
1209 return error("Unable to start request");
1212 hex[40] = '\0';
1213 get_sha1_hex(hex, sha1);
1214 return 0;
1217 int main(int argc, char **argv)
1219 char *commit_id;
1220 char *url;
1221 char *path;
1222 int arg = 1;
1223 int rc = 0;
1225 setup_git_directory();
1227 while (arg < argc && argv[arg][0] == '-') {
1228 if (argv[arg][1] == 't') {
1229 get_tree = 1;
1230 } else if (argv[arg][1] == 'c') {
1231 get_history = 1;
1232 } else if (argv[arg][1] == 'a') {
1233 get_all = 1;
1234 get_tree = 1;
1235 get_history = 1;
1236 } else if (argv[arg][1] == 'v') {
1237 get_verbosely = 1;
1238 } else if (argv[arg][1] == 'w') {
1239 write_ref = argv[arg + 1];
1240 arg++;
1241 } else if (!strcmp(argv[arg], "--recover")) {
1242 get_recover = 1;
1244 arg++;
1246 if (argc < arg + 2) {
1247 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1248 return 1;
1250 commit_id = argv[arg];
1251 url = argv[arg + 1];
1253 http_init();
1255 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1257 alt = xmalloc(sizeof(*alt));
1258 alt->base = url;
1259 alt->got_indices = 0;
1260 alt->packs = NULL;
1261 alt->next = NULL;
1262 path = strstr(url, "//");
1263 if (path) {
1264 path = strchr(path+2, '/');
1265 if (path)
1266 alt->path_len = strlen(path);
1269 if (pull(commit_id))
1270 rc = 1;
1272 curl_slist_free_all(no_pragma_header);
1274 http_cleanup();
1276 if (corrupt_object_found) {
1277 fprintf(stderr,
1278 "Some loose object were found to be corrupt, but they might be just\n"
1279 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1280 "status code. Suggest running git fsck-objects.\n");
1282 return rc;