git-tar-tree: devolve git-tar-tree into a wrapper for git-archive
[4msysgit-hv.git] / http-fetch.c
blobfac17607b442c5bf1e1d21c27ae9a2b6c473358c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int commits_on_stdin;
41 static int got_alternates = -1;
42 static int corrupt_object_found;
44 static struct curl_slist *no_pragma_header;
46 struct alt_base
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
55 static struct alt_base *alt;
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
64 struct object_request
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
93 #ifndef NO_EXPAT
94 struct xml_ctx
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
103 struct remote_ls_ctx
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
115 #endif
117 static struct object_request *object_queue_head;
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
147 static void fetch_alternates(const char *base);
149 static void process_object_response(void *callback_data);
151 static void start_object_request(struct object_request *obj_req)
153 char *hex = sha1_to_hex(obj_req->sha1);
154 char prevfile[PATH_MAX];
155 char *url;
156 char *posn;
157 int prevlocal;
158 unsigned char prev_buf[PREV_BUF_SIZE];
159 ssize_t prev_read = 0;
160 long prev_posn = 0;
161 char range[RANGE_HEADER_SIZE];
162 struct curl_slist *range_header = NULL;
163 struct active_request_slot *slot;
165 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
166 unlink(prevfile);
167 rename(obj_req->tmpfile, prevfile);
168 unlink(obj_req->tmpfile);
170 if (obj_req->local != -1)
171 error("fd leakage in start: %d", obj_req->local);
172 obj_req->local = open(obj_req->tmpfile,
173 O_WRONLY | O_CREAT | O_EXCL, 0666);
174 /* This could have failed due to the "lazy directory creation";
175 * try to mkdir the last path component.
177 if (obj_req->local < 0 && errno == ENOENT) {
178 char *dir = strrchr(obj_req->tmpfile, '/');
179 if (dir) {
180 *dir = 0;
181 mkdir(obj_req->tmpfile, 0777);
182 *dir = '/';
184 obj_req->local = open(obj_req->tmpfile,
185 O_WRONLY | O_CREAT | O_EXCL, 0666);
188 if (obj_req->local < 0) {
189 obj_req->state = ABORTED;
190 error("Couldn't create temporary file %s for %s: %s",
191 obj_req->tmpfile, obj_req->filename, strerror(errno));
192 return;
195 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
197 inflateInit(&obj_req->stream);
199 SHA1_Init(&obj_req->c);
201 url = xmalloc(strlen(obj_req->repo->base) + 50);
202 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
203 strcpy(url, obj_req->repo->base);
204 posn = url + strlen(obj_req->repo->base);
205 strcpy(posn, "objects/");
206 posn += 8;
207 memcpy(posn, hex, 2);
208 posn += 2;
209 *(posn++) = '/';
210 strcpy(posn, hex + 2);
211 strcpy(obj_req->url, url);
213 /* If a previous temp file is present, process what was already
214 fetched. */
215 prevlocal = open(prevfile, O_RDONLY);
216 if (prevlocal != -1) {
217 do {
218 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
219 if (prev_read>0) {
220 if (fwrite_sha1_file(prev_buf,
222 prev_read,
223 obj_req) == prev_read) {
224 prev_posn += prev_read;
225 } else {
226 prev_read = -1;
229 } while (prev_read > 0);
230 close(prevlocal);
232 unlink(prevfile);
234 /* Reset inflate/SHA1 if there was an error reading the previous temp
235 file; also rewind to the beginning of the local file. */
236 if (prev_read == -1) {
237 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
238 inflateInit(&obj_req->stream);
239 SHA1_Init(&obj_req->c);
240 if (prev_posn>0) {
241 prev_posn = 0;
242 lseek(obj_req->local, SEEK_SET, 0);
243 ftruncate(obj_req->local, 0);
247 slot = get_active_slot();
248 slot->callback_func = process_object_response;
249 slot->callback_data = obj_req;
250 obj_req->slot = slot;
252 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
253 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
254 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
255 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
256 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
258 /* If we have successfully processed data from a previous fetch
259 attempt, only fetch the data we don't already have. */
260 if (prev_posn>0) {
261 if (get_verbosely)
262 fprintf(stderr,
263 "Resuming fetch of object %s at byte %ld\n",
264 hex, prev_posn);
265 sprintf(range, "Range: bytes=%ld-", prev_posn);
266 range_header = curl_slist_append(range_header, range);
267 curl_easy_setopt(slot->curl,
268 CURLOPT_HTTPHEADER, range_header);
271 /* Try to get the request started, abort the request on error */
272 obj_req->state = ACTIVE;
273 if (!start_active_slot(slot)) {
274 obj_req->state = ABORTED;
275 obj_req->slot = NULL;
276 close(obj_req->local); obj_req->local = -1;
277 free(obj_req->url);
278 return;
282 static void finish_object_request(struct object_request *obj_req)
284 struct stat st;
286 fchmod(obj_req->local, 0444);
287 close(obj_req->local); obj_req->local = -1;
289 if (obj_req->http_code == 416) {
290 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
291 } else if (obj_req->curl_result != CURLE_OK) {
292 if (stat(obj_req->tmpfile, &st) == 0)
293 if (st.st_size == 0)
294 unlink(obj_req->tmpfile);
295 return;
298 inflateEnd(&obj_req->stream);
299 SHA1_Final(obj_req->real_sha1, &obj_req->c);
300 if (obj_req->zret != Z_STREAM_END) {
301 unlink(obj_req->tmpfile);
302 return;
304 if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
305 unlink(obj_req->tmpfile);
306 return;
308 obj_req->rename =
309 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
311 if (obj_req->rename == 0)
312 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
315 static void process_object_response(void *callback_data)
317 struct object_request *obj_req =
318 (struct object_request *)callback_data;
320 obj_req->curl_result = obj_req->slot->curl_result;
321 obj_req->http_code = obj_req->slot->http_code;
322 obj_req->slot = NULL;
323 obj_req->state = COMPLETE;
325 /* Use alternates if necessary */
326 if (obj_req->http_code == 404 ||
327 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
328 fetch_alternates(alt->base);
329 if (obj_req->repo->next != NULL) {
330 obj_req->repo =
331 obj_req->repo->next;
332 close(obj_req->local);
333 obj_req->local = -1;
334 start_object_request(obj_req);
335 return;
339 finish_object_request(obj_req);
342 static void release_object_request(struct object_request *obj_req)
344 struct object_request *entry = object_queue_head;
346 if (obj_req->local != -1)
347 error("fd leakage in release: %d", obj_req->local);
348 if (obj_req == object_queue_head) {
349 object_queue_head = obj_req->next;
350 } else {
351 while (entry->next != NULL && entry->next != obj_req)
352 entry = entry->next;
353 if (entry->next == obj_req)
354 entry->next = entry->next->next;
357 free(obj_req->url);
358 free(obj_req);
361 #ifdef USE_CURL_MULTI
362 void fill_active_slots(void)
364 struct object_request *obj_req = object_queue_head;
365 struct active_request_slot *slot = active_queue_head;
366 int num_transfers;
368 while (active_requests < max_requests && obj_req != NULL) {
369 if (obj_req->state == WAITING) {
370 if (has_sha1_file(obj_req->sha1))
371 obj_req->state = COMPLETE;
372 else
373 start_object_request(obj_req);
374 curl_multi_perform(curlm, &num_transfers);
376 obj_req = obj_req->next;
379 while (slot != NULL) {
380 if (!slot->in_use && slot->curl != NULL) {
381 curl_easy_cleanup(slot->curl);
382 slot->curl = NULL;
384 slot = slot->next;
387 #endif
389 void prefetch(unsigned char *sha1)
391 struct object_request *newreq;
392 struct object_request *tail;
393 char *filename = sha1_file_name(sha1);
395 newreq = xmalloc(sizeof(*newreq));
396 hashcpy(newreq->sha1, sha1);
397 newreq->repo = alt;
398 newreq->url = NULL;
399 newreq->local = -1;
400 newreq->state = WAITING;
401 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
402 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
403 "%s.temp", filename);
404 newreq->slot = NULL;
405 newreq->next = NULL;
407 if (object_queue_head == NULL) {
408 object_queue_head = newreq;
409 } else {
410 tail = object_queue_head;
411 while (tail->next != NULL) {
412 tail = tail->next;
414 tail->next = newreq;
417 #ifdef USE_CURL_MULTI
418 fill_active_slots();
419 step_active_slots();
420 #endif
423 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
425 char *hex = sha1_to_hex(sha1);
426 char *filename;
427 char *url;
428 char tmpfile[PATH_MAX];
429 long prev_posn = 0;
430 char range[RANGE_HEADER_SIZE];
431 struct curl_slist *range_header = NULL;
433 FILE *indexfile;
434 struct active_request_slot *slot;
435 struct slot_results results;
437 if (has_pack_index(sha1))
438 return 0;
440 if (get_verbosely)
441 fprintf(stderr, "Getting index for pack %s\n", hex);
443 url = xmalloc(strlen(repo->base) + 64);
444 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
446 filename = sha1_pack_index_name(sha1);
447 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
448 indexfile = fopen(tmpfile, "a");
449 if (!indexfile)
450 return error("Unable to open local file %s for pack index",
451 filename);
453 slot = get_active_slot();
454 slot->results = &results;
455 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
456 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
457 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
458 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
459 slot->local = indexfile;
461 /* If there is data present from a previous transfer attempt,
462 resume where it left off */
463 prev_posn = ftell(indexfile);
464 if (prev_posn>0) {
465 if (get_verbosely)
466 fprintf(stderr,
467 "Resuming fetch of index for pack %s at byte %ld\n",
468 hex, prev_posn);
469 sprintf(range, "Range: bytes=%ld-", prev_posn);
470 range_header = curl_slist_append(range_header, range);
471 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
474 if (start_active_slot(slot)) {
475 run_active_slot(slot);
476 if (results.curl_result != CURLE_OK) {
477 fclose(indexfile);
478 return error("Unable to get pack index %s\n%s", url,
479 curl_errorstr);
481 } else {
482 fclose(indexfile);
483 return error("Unable to start request");
486 fclose(indexfile);
488 return move_temp_to_file(tmpfile, filename);
491 static int setup_index(struct alt_base *repo, unsigned char *sha1)
493 struct packed_git *new_pack;
494 if (has_pack_file(sha1))
495 return 0; /* don't list this as something we can get */
497 if (fetch_index(repo, sha1))
498 return -1;
500 new_pack = parse_pack_index(sha1);
501 new_pack->next = repo->packs;
502 repo->packs = new_pack;
503 return 0;
506 static void process_alternates_response(void *callback_data)
508 struct alternates_request *alt_req =
509 (struct alternates_request *)callback_data;
510 struct active_request_slot *slot = alt_req->slot;
511 struct alt_base *tail = alt;
512 const char *base = alt_req->base;
513 static const char null_byte = '\0';
514 char *data;
515 int i = 0;
517 if (alt_req->http_specific) {
518 if (slot->curl_result != CURLE_OK ||
519 !alt_req->buffer->posn) {
521 /* Try reusing the slot to get non-http alternates */
522 alt_req->http_specific = 0;
523 sprintf(alt_req->url, "%s/objects/info/alternates",
524 base);
525 curl_easy_setopt(slot->curl, CURLOPT_URL,
526 alt_req->url);
527 active_requests++;
528 slot->in_use = 1;
529 if (slot->finished != NULL)
530 (*slot->finished) = 0;
531 if (!start_active_slot(slot)) {
532 got_alternates = -1;
533 slot->in_use = 0;
534 if (slot->finished != NULL)
535 (*slot->finished) = 1;
537 return;
539 } else if (slot->curl_result != CURLE_OK) {
540 if (slot->http_code != 404 &&
541 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
542 got_alternates = -1;
543 return;
547 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
548 alt_req->buffer->posn--;
549 data = alt_req->buffer->buffer;
551 while (i < alt_req->buffer->posn) {
552 int posn = i;
553 while (posn < alt_req->buffer->posn && data[posn] != '\n')
554 posn++;
555 if (data[posn] == '\n') {
556 int okay = 0;
557 int serverlen = 0;
558 struct alt_base *newalt;
559 char *target = NULL;
560 char *path;
561 if (data[i] == '/') {
562 serverlen = strchr(base + 8, '/') - base;
563 okay = 1;
564 } else if (!memcmp(data + i, "../", 3)) {
565 i += 3;
566 serverlen = strlen(base);
567 while (i + 2 < posn &&
568 !memcmp(data + i, "../", 3)) {
569 do {
570 serverlen--;
571 } while (serverlen &&
572 base[serverlen - 1] != '/');
573 i += 3;
575 /* If the server got removed, give up. */
576 okay = strchr(base, ':') - base + 3 <
577 serverlen;
578 } else if (alt_req->http_specific) {
579 char *colon = strchr(data + i, ':');
580 char *slash = strchr(data + i, '/');
581 if (colon && slash && colon < data + posn &&
582 slash < data + posn && colon < slash) {
583 okay = 1;
586 /* skip 'objects' at end */
587 if (okay) {
588 target = xmalloc(serverlen + posn - i - 6);
589 strlcpy(target, base, serverlen);
590 strlcpy(target + serverlen, data + i, posn - i - 6);
591 if (get_verbosely)
592 fprintf(stderr,
593 "Also look at %s\n", target);
594 newalt = xmalloc(sizeof(*newalt));
595 newalt->next = NULL;
596 newalt->base = target;
597 newalt->got_indices = 0;
598 newalt->packs = NULL;
599 path = strstr(target, "//");
600 if (path) {
601 path = strchr(path+2, '/');
602 if (path)
603 newalt->path_len = strlen(path);
606 while (tail->next != NULL)
607 tail = tail->next;
608 tail->next = newalt;
611 i = posn + 1;
614 got_alternates = 1;
617 static void fetch_alternates(const char *base)
619 struct buffer buffer;
620 char *url;
621 char *data;
622 struct active_request_slot *slot;
623 struct alternates_request alt_req;
625 /* If another request has already started fetching alternates,
626 wait for them to arrive and return to processing this request's
627 curl message */
628 #ifdef USE_CURL_MULTI
629 while (got_alternates == 0) {
630 step_active_slots();
632 #endif
634 /* Nothing to do if they've already been fetched */
635 if (got_alternates == 1)
636 return;
638 /* Start the fetch */
639 got_alternates = 0;
641 data = xmalloc(4096);
642 buffer.size = 4096;
643 buffer.posn = 0;
644 buffer.buffer = data;
646 if (get_verbosely)
647 fprintf(stderr, "Getting alternates list for %s\n", base);
649 url = xmalloc(strlen(base) + 31);
650 sprintf(url, "%s/objects/info/http-alternates", base);
652 /* Use a callback to process the result, since another request
653 may fail and need to have alternates loaded before continuing */
654 slot = get_active_slot();
655 slot->callback_func = process_alternates_response;
656 slot->callback_data = &alt_req;
658 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
659 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
660 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
662 alt_req.base = base;
663 alt_req.url = url;
664 alt_req.buffer = &buffer;
665 alt_req.http_specific = 1;
666 alt_req.slot = slot;
668 if (start_active_slot(slot))
669 run_active_slot(slot);
670 else
671 got_alternates = -1;
673 free(data);
674 free(url);
677 #ifndef NO_EXPAT
678 static void
679 xml_start_tag(void *userData, const char *name, const char **atts)
681 struct xml_ctx *ctx = (struct xml_ctx *)userData;
682 const char *c = strchr(name, ':');
683 int new_len;
685 if (c == NULL)
686 c = name;
687 else
688 c++;
690 new_len = strlen(ctx->name) + strlen(c) + 2;
692 if (new_len > ctx->len) {
693 ctx->name = xrealloc(ctx->name, new_len);
694 ctx->len = new_len;
696 strcat(ctx->name, ".");
697 strcat(ctx->name, c);
699 free(ctx->cdata);
700 ctx->cdata = NULL;
702 ctx->userFunc(ctx, 0);
705 static void
706 xml_end_tag(void *userData, const char *name)
708 struct xml_ctx *ctx = (struct xml_ctx *)userData;
709 const char *c = strchr(name, ':');
710 char *ep;
712 ctx->userFunc(ctx, 1);
714 if (c == NULL)
715 c = name;
716 else
717 c++;
719 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
720 *ep = 0;
723 static void
724 xml_cdata(void *userData, const XML_Char *s, int len)
726 struct xml_ctx *ctx = (struct xml_ctx *)userData;
727 free(ctx->cdata);
728 ctx->cdata = xmalloc(len + 1);
729 strlcpy(ctx->cdata, s, len + 1);
732 static int remote_ls(struct alt_base *repo, const char *path, int flags,
733 void (*userFunc)(struct remote_ls_ctx *ls),
734 void *userData);
736 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
738 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
740 if (tag_closed) {
741 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
742 if (ls->dentry_flags & IS_DIR) {
743 if (ls->flags & PROCESS_DIRS) {
744 ls->userFunc(ls);
746 if (strcmp(ls->dentry_name, ls->path) &&
747 ls->flags & RECURSIVE) {
748 ls->rc = remote_ls(ls->repo,
749 ls->dentry_name,
750 ls->flags,
751 ls->userFunc,
752 ls->userData);
754 } else if (ls->flags & PROCESS_FILES) {
755 ls->userFunc(ls);
757 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
758 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
759 ls->repo->path_len + 1);
760 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
761 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
762 ls->dentry_flags |= IS_DIR;
764 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
765 free(ls->dentry_name);
766 ls->dentry_name = NULL;
767 ls->dentry_flags = 0;
771 static int remote_ls(struct alt_base *repo, const char *path, int flags,
772 void (*userFunc)(struct remote_ls_ctx *ls),
773 void *userData)
775 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
776 struct active_request_slot *slot;
777 struct slot_results results;
778 struct buffer in_buffer;
779 struct buffer out_buffer;
780 char *in_data;
781 char *out_data;
782 XML_Parser parser = XML_ParserCreate(NULL);
783 enum XML_Status result;
784 struct curl_slist *dav_headers = NULL;
785 struct xml_ctx ctx;
786 struct remote_ls_ctx ls;
788 ls.flags = flags;
789 ls.repo = repo;
790 ls.path = xstrdup(path);
791 ls.dentry_name = NULL;
792 ls.dentry_flags = 0;
793 ls.userData = userData;
794 ls.userFunc = userFunc;
795 ls.rc = 0;
797 sprintf(url, "%s%s", repo->base, path);
799 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
800 out_data = xmalloc(out_buffer.size + 1);
801 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
802 out_buffer.posn = 0;
803 out_buffer.buffer = out_data;
805 in_buffer.size = 4096;
806 in_data = xmalloc(in_buffer.size);
807 in_buffer.posn = 0;
808 in_buffer.buffer = in_data;
810 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
811 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
813 slot = get_active_slot();
814 slot->results = &results;
815 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
816 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
817 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
818 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
819 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
820 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
821 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
822 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
823 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
825 if (start_active_slot(slot)) {
826 run_active_slot(slot);
827 if (results.curl_result == CURLE_OK) {
828 ctx.name = xcalloc(10, 1);
829 ctx.len = 0;
830 ctx.cdata = NULL;
831 ctx.userFunc = handle_remote_ls_ctx;
832 ctx.userData = &ls;
833 XML_SetUserData(parser, &ctx);
834 XML_SetElementHandler(parser, xml_start_tag,
835 xml_end_tag);
836 XML_SetCharacterDataHandler(parser, xml_cdata);
837 result = XML_Parse(parser, in_buffer.buffer,
838 in_buffer.posn, 1);
839 free(ctx.name);
841 if (result != XML_STATUS_OK) {
842 ls.rc = error("XML error: %s",
843 XML_ErrorString(
844 XML_GetErrorCode(parser)));
846 } else {
847 ls.rc = -1;
849 } else {
850 ls.rc = error("Unable to start PROPFIND request");
853 free(ls.path);
854 free(url);
855 free(out_data);
856 free(in_buffer.buffer);
857 curl_slist_free_all(dav_headers);
859 return ls.rc;
862 static void process_ls_pack(struct remote_ls_ctx *ls)
864 unsigned char sha1[20];
866 if (strlen(ls->dentry_name) == 63 &&
867 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
868 has_extension(ls->dentry_name, ".pack")) {
869 get_sha1_hex(ls->dentry_name + 18, sha1);
870 setup_index(ls->repo, sha1);
873 #endif
875 static int fetch_indices(struct alt_base *repo)
877 unsigned char sha1[20];
878 char *url;
879 struct buffer buffer;
880 char *data;
881 int i = 0;
883 struct active_request_slot *slot;
884 struct slot_results results;
886 if (repo->got_indices)
887 return 0;
889 data = xmalloc(4096);
890 buffer.size = 4096;
891 buffer.posn = 0;
892 buffer.buffer = data;
894 if (get_verbosely)
895 fprintf(stderr, "Getting pack list for %s\n", repo->base);
897 #ifndef NO_EXPAT
898 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
899 process_ls_pack, NULL) == 0)
900 return 0;
901 #endif
903 url = xmalloc(strlen(repo->base) + 21);
904 sprintf(url, "%s/objects/info/packs", repo->base);
906 slot = get_active_slot();
907 slot->results = &results;
908 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
909 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
910 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
911 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
912 if (start_active_slot(slot)) {
913 run_active_slot(slot);
914 if (results.curl_result != CURLE_OK) {
915 if (results.http_code == 404 ||
916 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
917 repo->got_indices = 1;
918 free(buffer.buffer);
919 return 0;
920 } else {
921 repo->got_indices = 0;
922 free(buffer.buffer);
923 return error("%s", curl_errorstr);
926 } else {
927 repo->got_indices = 0;
928 free(buffer.buffer);
929 return error("Unable to start request");
932 data = buffer.buffer;
933 while (i < buffer.posn) {
934 switch (data[i]) {
935 case 'P':
936 i++;
937 if (i + 52 <= buffer.posn &&
938 !strncmp(data + i, " pack-", 6) &&
939 !strncmp(data + i + 46, ".pack\n", 6)) {
940 get_sha1_hex(data + i + 6, sha1);
941 setup_index(repo, sha1);
942 i += 51;
943 break;
945 default:
946 while (i < buffer.posn && data[i] != '\n')
947 i++;
949 i++;
952 free(buffer.buffer);
953 repo->got_indices = 1;
954 return 0;
957 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
959 char *url;
960 struct packed_git *target;
961 struct packed_git **lst;
962 FILE *packfile;
963 char *filename;
964 char tmpfile[PATH_MAX];
965 int ret;
966 long prev_posn = 0;
967 char range[RANGE_HEADER_SIZE];
968 struct curl_slist *range_header = NULL;
970 struct active_request_slot *slot;
971 struct slot_results results;
973 if (fetch_indices(repo))
974 return -1;
975 target = find_sha1_pack(sha1, repo->packs);
976 if (!target)
977 return -1;
979 if (get_verbosely) {
980 fprintf(stderr, "Getting pack %s\n",
981 sha1_to_hex(target->sha1));
982 fprintf(stderr, " which contains %s\n",
983 sha1_to_hex(sha1));
986 url = xmalloc(strlen(repo->base) + 65);
987 sprintf(url, "%s/objects/pack/pack-%s.pack",
988 repo->base, sha1_to_hex(target->sha1));
990 filename = sha1_pack_name(target->sha1);
991 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
992 packfile = fopen(tmpfile, "a");
993 if (!packfile)
994 return error("Unable to open local file %s for pack",
995 filename);
997 slot = get_active_slot();
998 slot->results = &results;
999 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1000 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1001 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1002 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1003 slot->local = packfile;
1005 /* If there is data present from a previous transfer attempt,
1006 resume where it left off */
1007 prev_posn = ftell(packfile);
1008 if (prev_posn>0) {
1009 if (get_verbosely)
1010 fprintf(stderr,
1011 "Resuming fetch of pack %s at byte %ld\n",
1012 sha1_to_hex(target->sha1), prev_posn);
1013 sprintf(range, "Range: bytes=%ld-", prev_posn);
1014 range_header = curl_slist_append(range_header, range);
1015 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1018 if (start_active_slot(slot)) {
1019 run_active_slot(slot);
1020 if (results.curl_result != CURLE_OK) {
1021 fclose(packfile);
1022 return error("Unable to get pack file %s\n%s", url,
1023 curl_errorstr);
1025 } else {
1026 fclose(packfile);
1027 return error("Unable to start request");
1030 fclose(packfile);
1032 ret = move_temp_to_file(tmpfile, filename);
1033 if (ret)
1034 return ret;
1036 lst = &repo->packs;
1037 while (*lst != target)
1038 lst = &((*lst)->next);
1039 *lst = (*lst)->next;
1041 if (verify_pack(target, 0))
1042 return -1;
1043 install_packed_git(target);
1045 return 0;
1048 static void abort_object_request(struct object_request *obj_req)
1050 if (obj_req->local >= 0) {
1051 close(obj_req->local);
1052 obj_req->local = -1;
1054 unlink(obj_req->tmpfile);
1055 if (obj_req->slot) {
1056 release_active_slot(obj_req->slot);
1057 obj_req->slot = NULL;
1059 release_object_request(obj_req);
1062 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1064 char *hex = sha1_to_hex(sha1);
1065 int ret = 0;
1066 struct object_request *obj_req = object_queue_head;
1068 while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1069 obj_req = obj_req->next;
1070 if (obj_req == NULL)
1071 return error("Couldn't find request for %s in the queue", hex);
1073 if (has_sha1_file(obj_req->sha1)) {
1074 abort_object_request(obj_req);
1075 return 0;
1078 #ifdef USE_CURL_MULTI
1079 while (obj_req->state == WAITING) {
1080 step_active_slots();
1082 #else
1083 start_object_request(obj_req);
1084 #endif
1086 while (obj_req->state == ACTIVE) {
1087 run_active_slot(obj_req->slot);
1089 if (obj_req->local != -1) {
1090 close(obj_req->local); obj_req->local = -1;
1093 if (obj_req->state == ABORTED) {
1094 ret = error("Request for %s aborted", hex);
1095 } else if (obj_req->curl_result != CURLE_OK &&
1096 obj_req->http_code != 416) {
1097 if (obj_req->http_code == 404 ||
1098 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1099 ret = -1; /* Be silent, it is probably in a pack. */
1100 else
1101 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1102 obj_req->errorstr, obj_req->curl_result,
1103 obj_req->http_code, hex);
1104 } else if (obj_req->zret != Z_STREAM_END) {
1105 corrupt_object_found++;
1106 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1107 } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1108 ret = error("File %s has bad hash", hex);
1109 } else if (obj_req->rename < 0) {
1110 ret = error("unable to write sha1 filename %s",
1111 obj_req->filename);
1114 release_object_request(obj_req);
1115 return ret;
1118 int fetch(unsigned char *sha1)
1120 struct alt_base *altbase = alt;
1122 if (!fetch_object(altbase, sha1))
1123 return 0;
1124 while (altbase) {
1125 if (!fetch_pack(altbase, sha1))
1126 return 0;
1127 fetch_alternates(alt->base);
1128 altbase = altbase->next;
1130 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1131 alt->base);
1134 static inline int needs_quote(int ch)
1136 if (((ch >= 'A') && (ch <= 'Z'))
1137 || ((ch >= 'a') && (ch <= 'z'))
1138 || ((ch >= '0') && (ch <= '9'))
1139 || (ch == '/')
1140 || (ch == '-')
1141 || (ch == '.'))
1142 return 0;
1143 return 1;
1146 static inline int hex(int v)
1148 if (v < 10) return '0' + v;
1149 else return 'A' + v - 10;
1152 static char *quote_ref_url(const char *base, const char *ref)
1154 const char *cp;
1155 char *dp, *qref;
1156 int len, baselen, ch;
1158 baselen = strlen(base);
1159 len = baselen + 6; /* "refs/" + NUL */
1160 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1161 if (needs_quote(ch))
1162 len += 2; /* extra two hex plus replacement % */
1163 qref = xmalloc(len);
1164 memcpy(qref, base, baselen);
1165 memcpy(qref + baselen, "refs/", 5);
1166 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1167 if (needs_quote(ch)) {
1168 *dp++ = '%';
1169 *dp++ = hex((ch >> 4) & 0xF);
1170 *dp++ = hex(ch & 0xF);
1172 else
1173 *dp++ = ch;
1175 *dp = 0;
1177 return qref;
1180 int fetch_ref(char *ref, unsigned char *sha1)
1182 char *url;
1183 char hex[42];
1184 struct buffer buffer;
1185 const char *base = alt->base;
1186 struct active_request_slot *slot;
1187 struct slot_results results;
1188 buffer.size = 41;
1189 buffer.posn = 0;
1190 buffer.buffer = hex;
1191 hex[41] = '\0';
1193 url = quote_ref_url(base, ref);
1194 slot = get_active_slot();
1195 slot->results = &results;
1196 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1197 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1198 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1199 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1200 if (start_active_slot(slot)) {
1201 run_active_slot(slot);
1202 if (results.curl_result != CURLE_OK)
1203 return error("Couldn't get %s for %s\n%s",
1204 url, ref, curl_errorstr);
1205 } else {
1206 return error("Unable to start request");
1209 hex[40] = '\0';
1210 get_sha1_hex(hex, sha1);
1211 return 0;
1214 int main(int argc, const char **argv)
1216 int commits;
1217 const char **write_ref = NULL;
1218 char **commit_id;
1219 const char *url;
1220 char *path;
1221 int arg = 1;
1222 int rc = 0;
1224 setup_ident();
1225 setup_git_directory();
1226 git_config(git_default_config);
1228 while (arg < argc && argv[arg][0] == '-') {
1229 if (argv[arg][1] == 't') {
1230 get_tree = 1;
1231 } else if (argv[arg][1] == 'c') {
1232 get_history = 1;
1233 } else if (argv[arg][1] == 'a') {
1234 get_all = 1;
1235 get_tree = 1;
1236 get_history = 1;
1237 } else if (argv[arg][1] == 'v') {
1238 get_verbosely = 1;
1239 } else if (argv[arg][1] == 'w') {
1240 write_ref = &argv[arg + 1];
1241 arg++;
1242 } else if (!strcmp(argv[arg], "--recover")) {
1243 get_recover = 1;
1244 } else if (!strcmp(argv[arg], "--stdin")) {
1245 commits_on_stdin = 1;
1247 arg++;
1249 if (argc < arg + 2 - commits_on_stdin) {
1250 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1251 return 1;
1253 if (commits_on_stdin) {
1254 commits = pull_targets_stdin(&commit_id, &write_ref);
1255 } else {
1256 commit_id = (char **) &argv[arg++];
1257 commits = 1;
1259 url = argv[arg];
1261 http_init();
1263 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1265 alt = xmalloc(sizeof(*alt));
1266 alt->base = url;
1267 alt->got_indices = 0;
1268 alt->packs = NULL;
1269 alt->next = NULL;
1270 path = strstr(url, "//");
1271 if (path) {
1272 path = strchr(path+2, '/');
1273 if (path)
1274 alt->path_len = strlen(path);
1277 if (pull(commits, commit_id, write_ref, url))
1278 rc = 1;
1280 http_cleanup();
1282 curl_slist_free_all(no_pragma_header);
1284 if (commits_on_stdin)
1285 pull_targets_free(commits, commit_id, write_ref);
1287 if (corrupt_object_found) {
1288 fprintf(stderr,
1289 "Some loose object were found to be corrupt, but they might be just\n"
1290 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1291 "status code. Suggest running git fsck-objects.\n");
1293 return rc;