make clean: remove dist-doc targets.
[alt-git.git] / http-fetch.c
blobd3602b7d7d820a37b6395a7aeb96a8dc8368a646
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
42 static struct curl_slist *no_pragma_header;
44 struct alt_base
46 char *base;
47 int path_len;
48 int got_indices;
49 struct packed_git *packs;
50 struct alt_base *next;
53 static struct alt_base *alt = NULL;
55 enum object_request_state {
56 WAITING,
57 ABORTED,
58 ACTIVE,
59 COMPLETE,
62 struct object_request
64 unsigned char sha1[20];
65 struct alt_base *repo;
66 char *url;
67 char filename[PATH_MAX];
68 char tmpfile[PATH_MAX];
69 int local;
70 enum object_request_state state;
71 CURLcode curl_result;
72 char errorstr[CURL_ERROR_SIZE];
73 long http_code;
74 unsigned char real_sha1[20];
75 SHA_CTX c;
76 z_stream stream;
77 int zret;
78 int rename;
79 struct active_request_slot *slot;
80 struct object_request *next;
83 struct alternates_request {
84 char *base;
85 char *url;
86 struct buffer *buffer;
87 struct active_request_slot *slot;
88 int http_specific;
91 #ifndef NO_EXPAT
92 struct xml_ctx
94 char *name;
95 int len;
96 char *cdata;
97 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98 void *userData;
101 struct remote_ls_ctx
103 struct alt_base *repo;
104 char *path;
105 void (*userFunc)(struct remote_ls_ctx *ls);
106 void *userData;
107 int flags;
108 char *dentry_name;
109 int dentry_flags;
110 int rc;
111 struct remote_ls_ctx *parent;
113 #endif
115 static struct object_request *object_queue_head = NULL;
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118 void *data)
120 unsigned char expn[4096];
121 size_t size = eltsize * nmemb;
122 int posn = 0;
123 struct object_request *obj_req = (struct object_request *)data;
124 do {
125 ssize_t retval = write(obj_req->local,
126 ptr + posn, size - posn);
127 if (retval < 0)
128 return posn;
129 posn += retval;
130 } while (posn < size);
132 obj_req->stream.avail_in = size;
133 obj_req->stream.next_in = ptr;
134 do {
135 obj_req->stream.next_out = expn;
136 obj_req->stream.avail_out = sizeof(expn);
137 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138 SHA1_Update(&obj_req->c, expn,
139 sizeof(expn) - obj_req->stream.avail_out);
140 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141 data_received++;
142 return size;
145 static void fetch_alternates(char *base);
147 static void process_object_response(void *callback_data);
149 static void start_object_request(struct object_request *obj_req)
151 char *hex = sha1_to_hex(obj_req->sha1);
152 char prevfile[PATH_MAX];
153 char *url;
154 char *posn;
155 int prevlocal;
156 unsigned char prev_buf[PREV_BUF_SIZE];
157 ssize_t prev_read = 0;
158 long prev_posn = 0;
159 char range[RANGE_HEADER_SIZE];
160 struct curl_slist *range_header = NULL;
161 struct active_request_slot *slot;
163 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164 unlink(prevfile);
165 rename(obj_req->tmpfile, prevfile);
166 unlink(obj_req->tmpfile);
168 if (obj_req->local != -1)
169 error("fd leakage in start: %d", obj_req->local);
170 obj_req->local = open(obj_req->tmpfile,
171 O_WRONLY | O_CREAT | O_EXCL, 0666);
172 /* This could have failed due to the "lazy directory creation";
173 * try to mkdir the last path component.
175 if (obj_req->local < 0 && errno == ENOENT) {
176 char *dir = strrchr(obj_req->tmpfile, '/');
177 if (dir) {
178 *dir = 0;
179 mkdir(obj_req->tmpfile, 0777);
180 *dir = '/';
182 obj_req->local = open(obj_req->tmpfile,
183 O_WRONLY | O_CREAT | O_EXCL, 0666);
186 if (obj_req->local < 0) {
187 obj_req->state = ABORTED;
188 error("Couldn't create temporary file %s for %s: %s",
189 obj_req->tmpfile, obj_req->filename, strerror(errno));
190 return;
193 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
195 inflateInit(&obj_req->stream);
197 SHA1_Init(&obj_req->c);
199 url = xmalloc(strlen(obj_req->repo->base) + 50);
200 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201 strcpy(url, obj_req->repo->base);
202 posn = url + strlen(obj_req->repo->base);
203 strcpy(posn, "objects/");
204 posn += 8;
205 memcpy(posn, hex, 2);
206 posn += 2;
207 *(posn++) = '/';
208 strcpy(posn, hex + 2);
209 strcpy(obj_req->url, url);
211 /* If a previous temp file is present, process what was already
212 fetched. */
213 prevlocal = open(prevfile, O_RDONLY);
214 if (prevlocal != -1) {
215 do {
216 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217 if (prev_read>0) {
218 if (fwrite_sha1_file(prev_buf,
220 prev_read,
221 obj_req) == prev_read) {
222 prev_posn += prev_read;
223 } else {
224 prev_read = -1;
227 } while (prev_read > 0);
228 close(prevlocal);
230 unlink(prevfile);
232 /* Reset inflate/SHA1 if there was an error reading the previous temp
233 file; also rewind to the beginning of the local file. */
234 if (prev_read == -1) {
235 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236 inflateInit(&obj_req->stream);
237 SHA1_Init(&obj_req->c);
238 if (prev_posn>0) {
239 prev_posn = 0;
240 lseek(obj_req->local, SEEK_SET, 0);
241 ftruncate(obj_req->local, 0);
245 slot = get_active_slot();
246 slot->callback_func = process_object_response;
247 slot->callback_data = obj_req;
248 obj_req->slot = slot;
250 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
256 /* If we have successfully processed data from a previous fetch
257 attempt, only fetch the data we don't already have. */
258 if (prev_posn>0) {
259 if (get_verbosely)
260 fprintf(stderr,
261 "Resuming fetch of object %s at byte %ld\n",
262 hex, prev_posn);
263 sprintf(range, "Range: bytes=%ld-", prev_posn);
264 range_header = curl_slist_append(range_header, range);
265 curl_easy_setopt(slot->curl,
266 CURLOPT_HTTPHEADER, range_header);
269 /* Try to get the request started, abort the request on error */
270 obj_req->state = ACTIVE;
271 if (!start_active_slot(slot)) {
272 obj_req->state = ABORTED;
273 obj_req->slot = NULL;
274 close(obj_req->local); obj_req->local = -1;
275 free(obj_req->url);
276 return;
280 static void finish_object_request(struct object_request *obj_req)
282 struct stat st;
284 fchmod(obj_req->local, 0444);
285 close(obj_req->local); obj_req->local = -1;
287 if (obj_req->http_code == 416) {
288 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289 } else if (obj_req->curl_result != CURLE_OK) {
290 if (stat(obj_req->tmpfile, &st) == 0)
291 if (st.st_size == 0)
292 unlink(obj_req->tmpfile);
293 return;
296 inflateEnd(&obj_req->stream);
297 SHA1_Final(obj_req->real_sha1, &obj_req->c);
298 if (obj_req->zret != Z_STREAM_END) {
299 unlink(obj_req->tmpfile);
300 return;
302 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303 unlink(obj_req->tmpfile);
304 return;
306 obj_req->rename =
307 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
309 if (obj_req->rename == 0)
310 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 static void process_object_response(void *callback_data)
315 struct object_request *obj_req =
316 (struct object_request *)callback_data;
318 obj_req->curl_result = obj_req->slot->curl_result;
319 obj_req->http_code = obj_req->slot->http_code;
320 obj_req->slot = NULL;
321 obj_req->state = COMPLETE;
323 /* Use alternates if necessary */
324 if (obj_req->http_code == 404 ||
325 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326 fetch_alternates(alt->base);
327 if (obj_req->repo->next != NULL) {
328 obj_req->repo =
329 obj_req->repo->next;
330 close(obj_req->local);
331 obj_req->local = -1;
332 start_object_request(obj_req);
333 return;
337 finish_object_request(obj_req);
340 static void release_object_request(struct object_request *obj_req)
342 struct object_request *entry = object_queue_head;
344 if (obj_req->local != -1)
345 error("fd leakage in release: %d", obj_req->local);
346 if (obj_req == object_queue_head) {
347 object_queue_head = obj_req->next;
348 } else {
349 while (entry->next != NULL && entry->next != obj_req)
350 entry = entry->next;
351 if (entry->next == obj_req)
352 entry->next = entry->next->next;
355 free(obj_req->url);
356 free(obj_req);
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
362 struct object_request *obj_req = object_queue_head;
363 struct active_request_slot *slot = active_queue_head;
364 int num_transfers;
366 while (active_requests < max_requests && obj_req != NULL) {
367 if (obj_req->state == WAITING) {
368 if (has_sha1_file(obj_req->sha1))
369 obj_req->state = COMPLETE;
370 else
371 start_object_request(obj_req);
372 curl_multi_perform(curlm, &num_transfers);
374 obj_req = obj_req->next;
377 while (slot != NULL) {
378 if (!slot->in_use && slot->curl != NULL) {
379 curl_easy_cleanup(slot->curl);
380 slot->curl = NULL;
382 slot = slot->next;
385 #endif
387 void prefetch(unsigned char *sha1)
389 struct object_request *newreq;
390 struct object_request *tail;
391 char *filename = sha1_file_name(sha1);
393 newreq = xmalloc(sizeof(*newreq));
394 memcpy(newreq->sha1, sha1, 20);
395 newreq->repo = alt;
396 newreq->url = NULL;
397 newreq->local = -1;
398 newreq->state = WAITING;
399 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401 "%s.temp", filename);
402 newreq->slot = NULL;
403 newreq->next = NULL;
405 if (object_queue_head == NULL) {
406 object_queue_head = newreq;
407 } else {
408 tail = object_queue_head;
409 while (tail->next != NULL) {
410 tail = tail->next;
412 tail->next = newreq;
415 #ifdef USE_CURL_MULTI
416 fill_active_slots();
417 step_active_slots();
418 #endif
421 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
423 char *hex = sha1_to_hex(sha1);
424 char *filename;
425 char *url;
426 char tmpfile[PATH_MAX];
427 long prev_posn = 0;
428 char range[RANGE_HEADER_SIZE];
429 struct curl_slist *range_header = NULL;
431 FILE *indexfile;
432 struct active_request_slot *slot;
433 struct slot_results results;
435 if (has_pack_index(sha1))
436 return 0;
438 if (get_verbosely)
439 fprintf(stderr, "Getting index for pack %s\n", hex);
441 url = xmalloc(strlen(repo->base) + 64);
442 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
444 filename = sha1_pack_index_name(sha1);
445 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
446 indexfile = fopen(tmpfile, "a");
447 if (!indexfile)
448 return error("Unable to open local file %s for pack index",
449 filename);
451 slot = get_active_slot();
452 slot->results = &results;
453 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
454 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
455 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
456 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
457 slot->local = indexfile;
459 /* If there is data present from a previous transfer attempt,
460 resume where it left off */
461 prev_posn = ftell(indexfile);
462 if (prev_posn>0) {
463 if (get_verbosely)
464 fprintf(stderr,
465 "Resuming fetch of index for pack %s at byte %ld\n",
466 hex, prev_posn);
467 sprintf(range, "Range: bytes=%ld-", prev_posn);
468 range_header = curl_slist_append(range_header, range);
469 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
472 if (start_active_slot(slot)) {
473 run_active_slot(slot);
474 if (results.curl_result != CURLE_OK) {
475 fclose(indexfile);
476 return error("Unable to get pack index %s\n%s", url,
477 curl_errorstr);
479 } else {
480 fclose(indexfile);
481 return error("Unable to start request");
484 fclose(indexfile);
486 return move_temp_to_file(tmpfile, filename);
489 static int setup_index(struct alt_base *repo, unsigned char *sha1)
491 struct packed_git *new_pack;
492 if (has_pack_file(sha1))
493 return 0; // don't list this as something we can get
495 if (fetch_index(repo, sha1))
496 return -1;
498 new_pack = parse_pack_index(sha1);
499 new_pack->next = repo->packs;
500 repo->packs = new_pack;
501 return 0;
504 static void process_alternates_response(void *callback_data)
506 struct alternates_request *alt_req =
507 (struct alternates_request *)callback_data;
508 struct active_request_slot *slot = alt_req->slot;
509 struct alt_base *tail = alt;
510 char *base = alt_req->base;
511 static const char null_byte = '\0';
512 char *data;
513 int i = 0;
515 if (alt_req->http_specific) {
516 if (slot->curl_result != CURLE_OK ||
517 !alt_req->buffer->posn) {
519 /* Try reusing the slot to get non-http alternates */
520 alt_req->http_specific = 0;
521 sprintf(alt_req->url, "%s/objects/info/alternates",
522 base);
523 curl_easy_setopt(slot->curl, CURLOPT_URL,
524 alt_req->url);
525 active_requests++;
526 slot->in_use = 1;
527 if (slot->finished != NULL)
528 (*slot->finished) = 0;
529 if (!start_active_slot(slot)) {
530 got_alternates = -1;
531 slot->in_use = 0;
532 if (slot->finished != NULL)
533 (*slot->finished) = 1;
535 return;
537 } else if (slot->curl_result != CURLE_OK) {
538 if (slot->http_code != 404 &&
539 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
540 got_alternates = -1;
541 return;
545 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
546 alt_req->buffer->posn--;
547 data = alt_req->buffer->buffer;
549 while (i < alt_req->buffer->posn) {
550 int posn = i;
551 while (posn < alt_req->buffer->posn && data[posn] != '\n')
552 posn++;
553 if (data[posn] == '\n') {
554 int okay = 0;
555 int serverlen = 0;
556 struct alt_base *newalt;
557 char *target = NULL;
558 char *path;
559 if (data[i] == '/') {
560 serverlen = strchr(base + 8, '/') - base;
561 okay = 1;
562 } else if (!memcmp(data + i, "../", 3)) {
563 i += 3;
564 serverlen = strlen(base);
565 while (i + 2 < posn &&
566 !memcmp(data + i, "../", 3)) {
567 do {
568 serverlen--;
569 } while (serverlen &&
570 base[serverlen - 1] != '/');
571 i += 3;
573 // If the server got removed, give up.
574 okay = strchr(base, ':') - base + 3 <
575 serverlen;
576 } else if (alt_req->http_specific) {
577 char *colon = strchr(data + i, ':');
578 char *slash = strchr(data + i, '/');
579 if (colon && slash && colon < data + posn &&
580 slash < data + posn && colon < slash) {
581 okay = 1;
584 // skip 'objects' at end
585 if (okay) {
586 target = xmalloc(serverlen + posn - i - 6);
587 strncpy(target, base, serverlen);
588 strncpy(target + serverlen, data + i,
589 posn - i - 7);
590 target[serverlen + posn - i - 7] = '\0';
591 if (get_verbosely)
592 fprintf(stderr,
593 "Also look at %s\n", target);
594 newalt = xmalloc(sizeof(*newalt));
595 newalt->next = NULL;
596 newalt->base = target;
597 newalt->got_indices = 0;
598 newalt->packs = NULL;
599 path = strstr(target, "//");
600 if (path) {
601 path = strchr(path+2, '/');
602 if (path)
603 newalt->path_len = strlen(path);
606 while (tail->next != NULL)
607 tail = tail->next;
608 tail->next = newalt;
611 i = posn + 1;
614 got_alternates = 1;
617 static void fetch_alternates(char *base)
619 struct buffer buffer;
620 char *url;
621 char *data;
622 struct active_request_slot *slot;
623 struct alternates_request alt_req;
625 /* If another request has already started fetching alternates,
626 wait for them to arrive and return to processing this request's
627 curl message */
628 #ifdef USE_CURL_MULTI
629 while (got_alternates == 0) {
630 step_active_slots();
632 #endif
634 /* Nothing to do if they've already been fetched */
635 if (got_alternates == 1)
636 return;
638 /* Start the fetch */
639 got_alternates = 0;
641 data = xmalloc(4096);
642 buffer.size = 4096;
643 buffer.posn = 0;
644 buffer.buffer = data;
646 if (get_verbosely)
647 fprintf(stderr, "Getting alternates list for %s\n", base);
649 url = xmalloc(strlen(base) + 31);
650 sprintf(url, "%s/objects/info/http-alternates", base);
652 /* Use a callback to process the result, since another request
653 may fail and need to have alternates loaded before continuing */
654 slot = get_active_slot();
655 slot->callback_func = process_alternates_response;
656 slot->callback_data = &alt_req;
658 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
659 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
660 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
662 alt_req.base = base;
663 alt_req.url = url;
664 alt_req.buffer = &buffer;
665 alt_req.http_specific = 1;
666 alt_req.slot = slot;
668 if (start_active_slot(slot))
669 run_active_slot(slot);
670 else
671 got_alternates = -1;
673 free(data);
674 free(url);
677 #ifndef NO_EXPAT
678 static void
679 xml_start_tag(void *userData, const char *name, const char **atts)
681 struct xml_ctx *ctx = (struct xml_ctx *)userData;
682 const char *c = strchr(name, ':');
683 int new_len;
685 if (c == NULL)
686 c = name;
687 else
688 c++;
690 new_len = strlen(ctx->name) + strlen(c) + 2;
692 if (new_len > ctx->len) {
693 ctx->name = xrealloc(ctx->name, new_len);
694 ctx->len = new_len;
696 strcat(ctx->name, ".");
697 strcat(ctx->name, c);
699 if (ctx->cdata) {
700 free(ctx->cdata);
701 ctx->cdata = NULL;
704 ctx->userFunc(ctx, 0);
707 static void
708 xml_end_tag(void *userData, const char *name)
710 struct xml_ctx *ctx = (struct xml_ctx *)userData;
711 const char *c = strchr(name, ':');
712 char *ep;
714 ctx->userFunc(ctx, 1);
716 if (c == NULL)
717 c = name;
718 else
719 c++;
721 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
722 *ep = 0;
725 static void
726 xml_cdata(void *userData, const XML_Char *s, int len)
728 struct xml_ctx *ctx = (struct xml_ctx *)userData;
729 if (ctx->cdata)
730 free(ctx->cdata);
731 ctx->cdata = xcalloc(len+1, 1);
732 strncpy(ctx->cdata, s, len);
735 static int remote_ls(struct alt_base *repo, const char *path, int flags,
736 void (*userFunc)(struct remote_ls_ctx *ls),
737 void *userData);
739 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
741 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
743 if (tag_closed) {
744 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
745 if (ls->dentry_flags & IS_DIR) {
746 if (ls->flags & PROCESS_DIRS) {
747 ls->userFunc(ls);
749 if (strcmp(ls->dentry_name, ls->path) &&
750 ls->flags & RECURSIVE) {
751 ls->rc = remote_ls(ls->repo,
752 ls->dentry_name,
753 ls->flags,
754 ls->userFunc,
755 ls->userData);
757 } else if (ls->flags & PROCESS_FILES) {
758 ls->userFunc(ls);
760 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
761 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
762 ls->repo->path_len + 1);
763 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
764 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
765 ls->dentry_flags |= IS_DIR;
767 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
768 if (ls->dentry_name) {
769 free(ls->dentry_name);
771 ls->dentry_name = NULL;
772 ls->dentry_flags = 0;
776 static int remote_ls(struct alt_base *repo, const char *path, int flags,
777 void (*userFunc)(struct remote_ls_ctx *ls),
778 void *userData)
780 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
781 struct active_request_slot *slot;
782 struct slot_results results;
783 struct buffer in_buffer;
784 struct buffer out_buffer;
785 char *in_data;
786 char *out_data;
787 XML_Parser parser = XML_ParserCreate(NULL);
788 enum XML_Status result;
789 struct curl_slist *dav_headers = NULL;
790 struct xml_ctx ctx;
791 struct remote_ls_ctx ls;
793 ls.flags = flags;
794 ls.repo = repo;
795 ls.path = strdup(path);
796 ls.dentry_name = NULL;
797 ls.dentry_flags = 0;
798 ls.userData = userData;
799 ls.userFunc = userFunc;
800 ls.rc = 0;
802 sprintf(url, "%s%s", repo->base, path);
804 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
805 out_data = xmalloc(out_buffer.size + 1);
806 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
807 out_buffer.posn = 0;
808 out_buffer.buffer = out_data;
810 in_buffer.size = 4096;
811 in_data = xmalloc(in_buffer.size);
812 in_buffer.posn = 0;
813 in_buffer.buffer = in_data;
815 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
816 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
818 slot = get_active_slot();
819 slot->results = &results;
820 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
821 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
822 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
823 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
824 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
825 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
826 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
827 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
828 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
830 if (start_active_slot(slot)) {
831 run_active_slot(slot);
832 if (results.curl_result == CURLE_OK) {
833 ctx.name = xcalloc(10, 1);
834 ctx.len = 0;
835 ctx.cdata = NULL;
836 ctx.userFunc = handle_remote_ls_ctx;
837 ctx.userData = &ls;
838 XML_SetUserData(parser, &ctx);
839 XML_SetElementHandler(parser, xml_start_tag,
840 xml_end_tag);
841 XML_SetCharacterDataHandler(parser, xml_cdata);
842 result = XML_Parse(parser, in_buffer.buffer,
843 in_buffer.posn, 1);
844 free(ctx.name);
846 if (result != XML_STATUS_OK) {
847 ls.rc = error("XML error: %s",
848 XML_ErrorString(
849 XML_GetErrorCode(parser)));
851 } else {
852 ls.rc = -1;
854 } else {
855 ls.rc = error("Unable to start PROPFIND request");
858 free(ls.path);
859 free(url);
860 free(out_data);
861 free(in_buffer.buffer);
862 curl_slist_free_all(dav_headers);
864 return ls.rc;
867 static void process_ls_pack(struct remote_ls_ctx *ls)
869 unsigned char sha1[20];
871 if (strlen(ls->dentry_name) == 63 &&
872 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
873 !strncmp(ls->dentry_name+58, ".pack", 5)) {
874 get_sha1_hex(ls->dentry_name + 18, sha1);
875 setup_index(ls->repo, sha1);
878 #endif
880 static int fetch_indices(struct alt_base *repo)
882 unsigned char sha1[20];
883 char *url;
884 struct buffer buffer;
885 char *data;
886 int i = 0;
888 struct active_request_slot *slot;
889 struct slot_results results;
891 if (repo->got_indices)
892 return 0;
894 data = xmalloc(4096);
895 buffer.size = 4096;
896 buffer.posn = 0;
897 buffer.buffer = data;
899 if (get_verbosely)
900 fprintf(stderr, "Getting pack list for %s\n", repo->base);
902 #ifndef NO_EXPAT
903 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
904 process_ls_pack, NULL) == 0)
905 return 0;
906 #endif
908 url = xmalloc(strlen(repo->base) + 21);
909 sprintf(url, "%s/objects/info/packs", repo->base);
911 slot = get_active_slot();
912 slot->results = &results;
913 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
914 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
915 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
916 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
917 if (start_active_slot(slot)) {
918 run_active_slot(slot);
919 if (results.curl_result != CURLE_OK) {
920 if (results.http_code == 404 ||
921 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
922 repo->got_indices = 1;
923 free(buffer.buffer);
924 return 0;
925 } else {
926 repo->got_indices = 0;
927 free(buffer.buffer);
928 return error("%s", curl_errorstr);
931 } else {
932 repo->got_indices = 0;
933 free(buffer.buffer);
934 return error("Unable to start request");
937 data = buffer.buffer;
938 while (i < buffer.posn) {
939 switch (data[i]) {
940 case 'P':
941 i++;
942 if (i + 52 <= buffer.posn &&
943 !strncmp(data + i, " pack-", 6) &&
944 !strncmp(data + i + 46, ".pack\n", 6)) {
945 get_sha1_hex(data + i + 6, sha1);
946 setup_index(repo, sha1);
947 i += 51;
948 break;
950 default:
951 while (i < buffer.posn && data[i] != '\n')
952 i++;
954 i++;
957 free(buffer.buffer);
958 repo->got_indices = 1;
959 return 0;
962 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
964 char *url;
965 struct packed_git *target;
966 struct packed_git **lst;
967 FILE *packfile;
968 char *filename;
969 char tmpfile[PATH_MAX];
970 int ret;
971 long prev_posn = 0;
972 char range[RANGE_HEADER_SIZE];
973 struct curl_slist *range_header = NULL;
975 struct active_request_slot *slot;
976 struct slot_results results;
978 if (fetch_indices(repo))
979 return -1;
980 target = find_sha1_pack(sha1, repo->packs);
981 if (!target)
982 return -1;
984 if (get_verbosely) {
985 fprintf(stderr, "Getting pack %s\n",
986 sha1_to_hex(target->sha1));
987 fprintf(stderr, " which contains %s\n",
988 sha1_to_hex(sha1));
991 url = xmalloc(strlen(repo->base) + 65);
992 sprintf(url, "%s/objects/pack/pack-%s.pack",
993 repo->base, sha1_to_hex(target->sha1));
995 filename = sha1_pack_name(target->sha1);
996 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
997 packfile = fopen(tmpfile, "a");
998 if (!packfile)
999 return error("Unable to open local file %s for pack",
1000 filename);
1002 slot = get_active_slot();
1003 slot->results = &results;
1004 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008 slot->local = packfile;
1010 /* If there is data present from a previous transfer attempt,
1011 resume where it left off */
1012 prev_posn = ftell(packfile);
1013 if (prev_posn>0) {
1014 if (get_verbosely)
1015 fprintf(stderr,
1016 "Resuming fetch of pack %s at byte %ld\n",
1017 sha1_to_hex(target->sha1), prev_posn);
1018 sprintf(range, "Range: bytes=%ld-", prev_posn);
1019 range_header = curl_slist_append(range_header, range);
1020 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1023 if (start_active_slot(slot)) {
1024 run_active_slot(slot);
1025 if (results.curl_result != CURLE_OK) {
1026 fclose(packfile);
1027 return error("Unable to get pack file %s\n%s", url,
1028 curl_errorstr);
1030 } else {
1031 fclose(packfile);
1032 return error("Unable to start request");
1035 fclose(packfile);
1037 ret = move_temp_to_file(tmpfile, filename);
1038 if (ret)
1039 return ret;
1041 lst = &repo->packs;
1042 while (*lst != target)
1043 lst = &((*lst)->next);
1044 *lst = (*lst)->next;
1046 if (verify_pack(target, 0))
1047 return -1;
1048 install_packed_git(target);
1050 return 0;
1053 static void abort_object_request(struct object_request *obj_req)
1055 if (obj_req->local >= 0) {
1056 close(obj_req->local);
1057 obj_req->local = -1;
1059 unlink(obj_req->tmpfile);
1060 if (obj_req->slot) {
1061 release_active_slot(obj_req->slot);
1062 obj_req->slot = NULL;
1064 release_object_request(obj_req);
1067 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1069 char *hex = sha1_to_hex(sha1);
1070 int ret = 0;
1071 struct object_request *obj_req = object_queue_head;
1073 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074 obj_req = obj_req->next;
1075 if (obj_req == NULL)
1076 return error("Couldn't find request for %s in the queue", hex);
1078 if (has_sha1_file(obj_req->sha1)) {
1079 abort_object_request(obj_req);
1080 return 0;
1083 #ifdef USE_CURL_MULTI
1084 while (obj_req->state == WAITING) {
1085 step_active_slots();
1087 #else
1088 start_object_request(obj_req);
1089 #endif
1091 while (obj_req->state == ACTIVE) {
1092 run_active_slot(obj_req->slot);
1094 if (obj_req->local != -1) {
1095 close(obj_req->local); obj_req->local = -1;
1098 if (obj_req->state == ABORTED) {
1099 ret = error("Request for %s aborted", hex);
1100 } else if (obj_req->curl_result != CURLE_OK &&
1101 obj_req->http_code != 416) {
1102 if (obj_req->http_code == 404 ||
1103 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104 ret = -1; /* Be silent, it is probably in a pack. */
1105 else
1106 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107 obj_req->errorstr, obj_req->curl_result,
1108 obj_req->http_code, hex);
1109 } else if (obj_req->zret != Z_STREAM_END) {
1110 corrupt_object_found++;
1111 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113 ret = error("File %s has bad hash", hex);
1114 } else if (obj_req->rename < 0) {
1115 ret = error("unable to write sha1 filename %s",
1116 obj_req->filename);
1119 release_object_request(obj_req);
1120 return ret;
1123 int fetch(unsigned char *sha1)
1125 struct alt_base *altbase = alt;
1127 if (!fetch_object(altbase, sha1))
1128 return 0;
1129 while (altbase) {
1130 if (!fetch_pack(altbase, sha1))
1131 return 0;
1132 fetch_alternates(alt->base);
1133 altbase = altbase->next;
1135 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136 alt->base);
1139 static inline int needs_quote(int ch)
1141 switch (ch) {
1142 case '/': case '-': case '.':
1143 case 'A'...'Z': case 'a'...'z': case '0'...'9':
1144 return 0;
1145 default:
1146 return 1;
1150 static inline int hex(int v)
1152 if (v < 10) return '0' + v;
1153 else return 'A' + v - 10;
1156 static char *quote_ref_url(const char *base, const char *ref)
1158 const char *cp;
1159 char *dp, *qref;
1160 int len, baselen, ch;
1162 baselen = strlen(base);
1163 len = baselen + 6; /* "refs/" + NUL */
1164 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1165 if (needs_quote(ch))
1166 len += 2; /* extra two hex plus replacement % */
1167 qref = xmalloc(len);
1168 memcpy(qref, base, baselen);
1169 memcpy(qref + baselen, "refs/", 5);
1170 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1171 if (needs_quote(ch)) {
1172 *dp++ = '%';
1173 *dp++ = hex((ch >> 4) & 0xF);
1174 *dp++ = hex(ch & 0xF);
1176 else
1177 *dp++ = ch;
1179 *dp = 0;
1181 return qref;
1184 int fetch_ref(char *ref, unsigned char *sha1)
1186 char *url;
1187 char hex[42];
1188 struct buffer buffer;
1189 char *base = alt->base;
1190 struct active_request_slot *slot;
1191 struct slot_results results;
1192 buffer.size = 41;
1193 buffer.posn = 0;
1194 buffer.buffer = hex;
1195 hex[41] = '\0';
1197 url = quote_ref_url(base, ref);
1198 slot = get_active_slot();
1199 slot->results = &results;
1200 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1201 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1202 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1203 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1204 if (start_active_slot(slot)) {
1205 run_active_slot(slot);
1206 if (results.curl_result != CURLE_OK)
1207 return error("Couldn't get %s for %s\n%s",
1208 url, ref, curl_errorstr);
1209 } else {
1210 return error("Unable to start request");
1213 hex[40] = '\0';
1214 get_sha1_hex(hex, sha1);
1215 return 0;
1218 int main(int argc, char **argv)
1220 char *commit_id;
1221 char *url;
1222 char *path;
1223 int arg = 1;
1224 int rc = 0;
1226 setup_git_directory();
1227 git_config(git_default_config);
1229 while (arg < argc && argv[arg][0] == '-') {
1230 if (argv[arg][1] == 't') {
1231 get_tree = 1;
1232 } else if (argv[arg][1] == 'c') {
1233 get_history = 1;
1234 } else if (argv[arg][1] == 'a') {
1235 get_all = 1;
1236 get_tree = 1;
1237 get_history = 1;
1238 } else if (argv[arg][1] == 'v') {
1239 get_verbosely = 1;
1240 } else if (argv[arg][1] == 'w') {
1241 write_ref = argv[arg + 1];
1242 arg++;
1243 } else if (!strcmp(argv[arg], "--recover")) {
1244 get_recover = 1;
1246 arg++;
1248 if (argc < arg + 2) {
1249 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1250 return 1;
1252 commit_id = argv[arg];
1253 url = argv[arg + 1];
1254 write_ref_log_details = url;
1256 http_init();
1258 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1260 alt = xmalloc(sizeof(*alt));
1261 alt->base = url;
1262 alt->got_indices = 0;
1263 alt->packs = NULL;
1264 alt->next = NULL;
1265 path = strstr(url, "//");
1266 if (path) {
1267 path = strchr(path+2, '/');
1268 if (path)
1269 alt->path_len = strlen(path);
1272 if (pull(commit_id))
1273 rc = 1;
1275 http_cleanup();
1277 curl_slist_free_all(no_pragma_header);
1279 if (corrupt_object_found) {
1280 fprintf(stderr,
1281 "Some loose object were found to be corrupt, but they might be just\n"
1282 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1283 "status code. Suggest running git fsck-objects.\n");
1285 return rc;