Merge branch 'jn/web' into next
[git/repo.git] / http-fetch.c
blob3a2cb5e1fc1b2794f632c5ddbedfa99500081e5a
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
42 static struct curl_slist *no_pragma_header;
44 struct alt_base
46 char *base;
47 int path_len;
48 int got_indices;
49 struct packed_git *packs;
50 struct alt_base *next;
53 static struct alt_base *alt = NULL;
55 enum object_request_state {
56 WAITING,
57 ABORTED,
58 ACTIVE,
59 COMPLETE,
62 struct object_request
64 unsigned char sha1[20];
65 struct alt_base *repo;
66 char *url;
67 char filename[PATH_MAX];
68 char tmpfile[PATH_MAX];
69 int local;
70 enum object_request_state state;
71 CURLcode curl_result;
72 char errorstr[CURL_ERROR_SIZE];
73 long http_code;
74 unsigned char real_sha1[20];
75 SHA_CTX c;
76 z_stream stream;
77 int zret;
78 int rename;
79 struct active_request_slot *slot;
80 struct object_request *next;
83 struct alternates_request {
84 char *base;
85 char *url;
86 struct buffer *buffer;
87 struct active_request_slot *slot;
88 int http_specific;
91 #ifndef NO_EXPAT
92 struct xml_ctx
94 char *name;
95 int len;
96 char *cdata;
97 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98 void *userData;
101 struct remote_ls_ctx
103 struct alt_base *repo;
104 char *path;
105 void (*userFunc)(struct remote_ls_ctx *ls);
106 void *userData;
107 int flags;
108 char *dentry_name;
109 int dentry_flags;
110 int rc;
111 struct remote_ls_ctx *parent;
113 #endif
115 static struct object_request *object_queue_head = NULL;
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118 void *data)
120 unsigned char expn[4096];
121 size_t size = eltsize * nmemb;
122 int posn = 0;
123 struct object_request *obj_req = (struct object_request *)data;
124 do {
125 ssize_t retval = write(obj_req->local,
126 ptr + posn, size - posn);
127 if (retval < 0)
128 return posn;
129 posn += retval;
130 } while (posn < size);
132 obj_req->stream.avail_in = size;
133 obj_req->stream.next_in = ptr;
134 do {
135 obj_req->stream.next_out = expn;
136 obj_req->stream.avail_out = sizeof(expn);
137 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138 SHA1_Update(&obj_req->c, expn,
139 sizeof(expn) - obj_req->stream.avail_out);
140 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141 data_received++;
142 return size;
145 static void fetch_alternates(char *base);
147 static void process_object_response(void *callback_data);
149 static void start_object_request(struct object_request *obj_req)
151 char *hex = sha1_to_hex(obj_req->sha1);
152 char prevfile[PATH_MAX];
153 char *url;
154 char *posn;
155 int prevlocal;
156 unsigned char prev_buf[PREV_BUF_SIZE];
157 ssize_t prev_read = 0;
158 long prev_posn = 0;
159 char range[RANGE_HEADER_SIZE];
160 struct curl_slist *range_header = NULL;
161 struct active_request_slot *slot;
163 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164 unlink(prevfile);
165 rename(obj_req->tmpfile, prevfile);
166 unlink(obj_req->tmpfile);
168 if (obj_req->local != -1)
169 error("fd leakage in start: %d", obj_req->local);
170 obj_req->local = open(obj_req->tmpfile,
171 O_WRONLY | O_CREAT | O_EXCL, 0666);
172 /* This could have failed due to the "lazy directory creation";
173 * try to mkdir the last path component.
175 if (obj_req->local < 0 && errno == ENOENT) {
176 char *dir = strrchr(obj_req->tmpfile, '/');
177 if (dir) {
178 *dir = 0;
179 mkdir(obj_req->tmpfile, 0777);
180 *dir = '/';
182 obj_req->local = open(obj_req->tmpfile,
183 O_WRONLY | O_CREAT | O_EXCL, 0666);
186 if (obj_req->local < 0) {
187 obj_req->state = ABORTED;
188 error("Couldn't create temporary file %s for %s: %s",
189 obj_req->tmpfile, obj_req->filename, strerror(errno));
190 return;
193 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
195 inflateInit(&obj_req->stream);
197 SHA1_Init(&obj_req->c);
199 url = xmalloc(strlen(obj_req->repo->base) + 50);
200 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201 strcpy(url, obj_req->repo->base);
202 posn = url + strlen(obj_req->repo->base);
203 strcpy(posn, "objects/");
204 posn += 8;
205 memcpy(posn, hex, 2);
206 posn += 2;
207 *(posn++) = '/';
208 strcpy(posn, hex + 2);
209 strcpy(obj_req->url, url);
211 /* If a previous temp file is present, process what was already
212 fetched. */
213 prevlocal = open(prevfile, O_RDONLY);
214 if (prevlocal != -1) {
215 do {
216 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217 if (prev_read>0) {
218 if (fwrite_sha1_file(prev_buf,
220 prev_read,
221 obj_req) == prev_read) {
222 prev_posn += prev_read;
223 } else {
224 prev_read = -1;
227 } while (prev_read > 0);
228 close(prevlocal);
230 unlink(prevfile);
232 /* Reset inflate/SHA1 if there was an error reading the previous temp
233 file; also rewind to the beginning of the local file. */
234 if (prev_read == -1) {
235 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236 inflateInit(&obj_req->stream);
237 SHA1_Init(&obj_req->c);
238 if (prev_posn>0) {
239 prev_posn = 0;
240 lseek(obj_req->local, SEEK_SET, 0);
241 ftruncate(obj_req->local, 0);
245 slot = get_active_slot();
246 slot->callback_func = process_object_response;
247 slot->callback_data = obj_req;
248 obj_req->slot = slot;
250 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
256 /* If we have successfully processed data from a previous fetch
257 attempt, only fetch the data we don't already have. */
258 if (prev_posn>0) {
259 if (get_verbosely)
260 fprintf(stderr,
261 "Resuming fetch of object %s at byte %ld\n",
262 hex, prev_posn);
263 sprintf(range, "Range: bytes=%ld-", prev_posn);
264 range_header = curl_slist_append(range_header, range);
265 curl_easy_setopt(slot->curl,
266 CURLOPT_HTTPHEADER, range_header);
269 /* Try to get the request started, abort the request on error */
270 obj_req->state = ACTIVE;
271 if (!start_active_slot(slot)) {
272 obj_req->state = ABORTED;
273 obj_req->slot = NULL;
274 close(obj_req->local); obj_req->local = -1;
275 free(obj_req->url);
276 return;
280 static void finish_object_request(struct object_request *obj_req)
282 struct stat st;
284 fchmod(obj_req->local, 0444);
285 close(obj_req->local); obj_req->local = -1;
287 if (obj_req->http_code == 416) {
288 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289 } else if (obj_req->curl_result != CURLE_OK) {
290 if (stat(obj_req->tmpfile, &st) == 0)
291 if (st.st_size == 0)
292 unlink(obj_req->tmpfile);
293 return;
296 inflateEnd(&obj_req->stream);
297 SHA1_Final(obj_req->real_sha1, &obj_req->c);
298 if (obj_req->zret != Z_STREAM_END) {
299 unlink(obj_req->tmpfile);
300 return;
302 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303 unlink(obj_req->tmpfile);
304 return;
306 obj_req->rename =
307 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
309 if (obj_req->rename == 0)
310 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 static void process_object_response(void *callback_data)
315 struct object_request *obj_req =
316 (struct object_request *)callback_data;
318 obj_req->curl_result = obj_req->slot->curl_result;
319 obj_req->http_code = obj_req->slot->http_code;
320 obj_req->slot = NULL;
321 obj_req->state = COMPLETE;
323 /* Use alternates if necessary */
324 if (obj_req->http_code == 404 ||
325 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326 fetch_alternates(alt->base);
327 if (obj_req->repo->next != NULL) {
328 obj_req->repo =
329 obj_req->repo->next;
330 close(obj_req->local);
331 obj_req->local = -1;
332 start_object_request(obj_req);
333 return;
337 finish_object_request(obj_req);
340 static void release_object_request(struct object_request *obj_req)
342 struct object_request *entry = object_queue_head;
344 if (obj_req->local != -1)
345 error("fd leakage in release: %d", obj_req->local);
346 if (obj_req == object_queue_head) {
347 object_queue_head = obj_req->next;
348 } else {
349 while (entry->next != NULL && entry->next != obj_req)
350 entry = entry->next;
351 if (entry->next == obj_req)
352 entry->next = entry->next->next;
355 free(obj_req->url);
356 free(obj_req);
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
362 struct object_request *obj_req = object_queue_head;
363 struct active_request_slot *slot = active_queue_head;
364 int num_transfers;
366 while (active_requests < max_requests && obj_req != NULL) {
367 if (obj_req->state == WAITING) {
368 if (has_sha1_file(obj_req->sha1))
369 obj_req->state = COMPLETE;
370 else
371 start_object_request(obj_req);
372 curl_multi_perform(curlm, &num_transfers);
374 obj_req = obj_req->next;
377 while (slot != NULL) {
378 if (!slot->in_use && slot->curl != NULL) {
379 curl_easy_cleanup(slot->curl);
380 slot->curl = NULL;
382 slot = slot->next;
385 #endif
387 void prefetch(unsigned char *sha1)
389 struct object_request *newreq;
390 struct object_request *tail;
391 char *filename = sha1_file_name(sha1);
393 newreq = xmalloc(sizeof(*newreq));
394 memcpy(newreq->sha1, sha1, 20);
395 newreq->repo = alt;
396 newreq->url = NULL;
397 newreq->local = -1;
398 newreq->state = WAITING;
399 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401 "%s.temp", filename);
402 newreq->slot = NULL;
403 newreq->next = NULL;
405 if (object_queue_head == NULL) {
406 object_queue_head = newreq;
407 } else {
408 tail = object_queue_head;
409 while (tail->next != NULL) {
410 tail = tail->next;
412 tail->next = newreq;
415 #ifdef USE_CURL_MULTI
416 fill_active_slots();
417 step_active_slots();
418 #endif
421 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
423 char *hex = sha1_to_hex(sha1);
424 char *filename;
425 char *url;
426 char tmpfile[PATH_MAX];
427 long prev_posn = 0;
428 char range[RANGE_HEADER_SIZE];
429 struct curl_slist *range_header = NULL;
431 FILE *indexfile;
432 struct active_request_slot *slot;
433 struct slot_results results;
435 if (has_pack_index(sha1))
436 return 0;
438 if (get_verbosely)
439 fprintf(stderr, "Getting index for pack %s\n", hex);
441 url = xmalloc(strlen(repo->base) + 64);
442 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
444 filename = sha1_pack_index_name(sha1);
445 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
446 indexfile = fopen(tmpfile, "a");
447 if (!indexfile)
448 return error("Unable to open local file %s for pack index",
449 filename);
451 slot = get_active_slot();
452 slot->results = &results;
453 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
454 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
455 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
456 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
457 slot->local = indexfile;
459 /* If there is data present from a previous transfer attempt,
460 resume where it left off */
461 prev_posn = ftell(indexfile);
462 if (prev_posn>0) {
463 if (get_verbosely)
464 fprintf(stderr,
465 "Resuming fetch of index for pack %s at byte %ld\n",
466 hex, prev_posn);
467 sprintf(range, "Range: bytes=%ld-", prev_posn);
468 range_header = curl_slist_append(range_header, range);
469 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
472 if (start_active_slot(slot)) {
473 run_active_slot(slot);
474 if (results.curl_result != CURLE_OK) {
475 fclose(indexfile);
476 return error("Unable to get pack index %s\n%s", url,
477 curl_errorstr);
479 } else {
480 fclose(indexfile);
481 return error("Unable to start request");
484 fclose(indexfile);
486 return move_temp_to_file(tmpfile, filename);
489 static int setup_index(struct alt_base *repo, unsigned char *sha1)
491 struct packed_git *new_pack;
492 if (has_pack_file(sha1))
493 return 0; // don't list this as something we can get
495 if (fetch_index(repo, sha1))
496 return -1;
498 new_pack = parse_pack_index(sha1);
499 new_pack->next = repo->packs;
500 repo->packs = new_pack;
501 return 0;
504 static void process_alternates_response(void *callback_data)
506 struct alternates_request *alt_req =
507 (struct alternates_request *)callback_data;
508 struct active_request_slot *slot = alt_req->slot;
509 struct alt_base *tail = alt;
510 char *base = alt_req->base;
511 static const char null_byte = '\0';
512 char *data;
513 int i = 0;
515 if (alt_req->http_specific) {
516 if (slot->curl_result != CURLE_OK ||
517 !alt_req->buffer->posn) {
519 /* Try reusing the slot to get non-http alternates */
520 alt_req->http_specific = 0;
521 sprintf(alt_req->url, "%s/objects/info/alternates",
522 base);
523 curl_easy_setopt(slot->curl, CURLOPT_URL,
524 alt_req->url);
525 active_requests++;
526 slot->in_use = 1;
527 if (slot->finished != NULL)
528 (*slot->finished) = 0;
529 if (!start_active_slot(slot)) {
530 got_alternates = -1;
531 slot->in_use = 0;
532 if (slot->finished != NULL)
533 (*slot->finished) = 1;
535 return;
537 } else if (slot->curl_result != CURLE_OK) {
538 if (slot->http_code != 404 &&
539 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
540 got_alternates = -1;
541 return;
545 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
546 alt_req->buffer->posn--;
547 data = alt_req->buffer->buffer;
549 while (i < alt_req->buffer->posn) {
550 int posn = i;
551 while (posn < alt_req->buffer->posn && data[posn] != '\n')
552 posn++;
553 if (data[posn] == '\n') {
554 int okay = 0;
555 int serverlen = 0;
556 struct alt_base *newalt;
557 char *target = NULL;
558 char *path;
559 if (data[i] == '/') {
560 serverlen = strchr(base + 8, '/') - base;
561 okay = 1;
562 } else if (!memcmp(data + i, "../", 3)) {
563 i += 3;
564 serverlen = strlen(base);
565 while (i + 2 < posn &&
566 !memcmp(data + i, "../", 3)) {
567 do {
568 serverlen--;
569 } while (serverlen &&
570 base[serverlen - 1] != '/');
571 i += 3;
573 // If the server got removed, give up.
574 okay = strchr(base, ':') - base + 3 <
575 serverlen;
576 } else if (alt_req->http_specific) {
577 char *colon = strchr(data + i, ':');
578 char *slash = strchr(data + i, '/');
579 if (colon && slash && colon < data + posn &&
580 slash < data + posn && colon < slash) {
581 okay = 1;
584 // skip 'objects' at end
585 if (okay) {
586 target = xmalloc(serverlen + posn - i - 6);
587 safe_strncpy(target, base, serverlen);
588 safe_strncpy(target + serverlen, data + i, posn - i - 6);
589 if (get_verbosely)
590 fprintf(stderr,
591 "Also look at %s\n", target);
592 newalt = xmalloc(sizeof(*newalt));
593 newalt->next = NULL;
594 newalt->base = target;
595 newalt->got_indices = 0;
596 newalt->packs = NULL;
597 path = strstr(target, "//");
598 if (path) {
599 path = strchr(path+2, '/');
600 if (path)
601 newalt->path_len = strlen(path);
604 while (tail->next != NULL)
605 tail = tail->next;
606 tail->next = newalt;
609 i = posn + 1;
612 got_alternates = 1;
615 static void fetch_alternates(char *base)
617 struct buffer buffer;
618 char *url;
619 char *data;
620 struct active_request_slot *slot;
621 struct alternates_request alt_req;
623 /* If another request has already started fetching alternates,
624 wait for them to arrive and return to processing this request's
625 curl message */
626 #ifdef USE_CURL_MULTI
627 while (got_alternates == 0) {
628 step_active_slots();
630 #endif
632 /* Nothing to do if they've already been fetched */
633 if (got_alternates == 1)
634 return;
636 /* Start the fetch */
637 got_alternates = 0;
639 data = xmalloc(4096);
640 buffer.size = 4096;
641 buffer.posn = 0;
642 buffer.buffer = data;
644 if (get_verbosely)
645 fprintf(stderr, "Getting alternates list for %s\n", base);
647 url = xmalloc(strlen(base) + 31);
648 sprintf(url, "%s/objects/info/http-alternates", base);
650 /* Use a callback to process the result, since another request
651 may fail and need to have alternates loaded before continuing */
652 slot = get_active_slot();
653 slot->callback_func = process_alternates_response;
654 slot->callback_data = &alt_req;
656 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
657 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
658 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
660 alt_req.base = base;
661 alt_req.url = url;
662 alt_req.buffer = &buffer;
663 alt_req.http_specific = 1;
664 alt_req.slot = slot;
666 if (start_active_slot(slot))
667 run_active_slot(slot);
668 else
669 got_alternates = -1;
671 free(data);
672 free(url);
675 #ifndef NO_EXPAT
676 static void
677 xml_start_tag(void *userData, const char *name, const char **atts)
679 struct xml_ctx *ctx = (struct xml_ctx *)userData;
680 const char *c = strchr(name, ':');
681 int new_len;
683 if (c == NULL)
684 c = name;
685 else
686 c++;
688 new_len = strlen(ctx->name) + strlen(c) + 2;
690 if (new_len > ctx->len) {
691 ctx->name = xrealloc(ctx->name, new_len);
692 ctx->len = new_len;
694 strcat(ctx->name, ".");
695 strcat(ctx->name, c);
697 if (ctx->cdata) {
698 free(ctx->cdata);
699 ctx->cdata = NULL;
702 ctx->userFunc(ctx, 0);
705 static void
706 xml_end_tag(void *userData, const char *name)
708 struct xml_ctx *ctx = (struct xml_ctx *)userData;
709 const char *c = strchr(name, ':');
710 char *ep;
712 ctx->userFunc(ctx, 1);
714 if (c == NULL)
715 c = name;
716 else
717 c++;
719 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
720 *ep = 0;
723 static void
724 xml_cdata(void *userData, const XML_Char *s, int len)
726 struct xml_ctx *ctx = (struct xml_ctx *)userData;
727 if (ctx->cdata)
728 free(ctx->cdata);
729 ctx->cdata = xmalloc(len + 1);
730 safe_strncpy(ctx->cdata, s, len + 1);
733 static int remote_ls(struct alt_base *repo, const char *path, int flags,
734 void (*userFunc)(struct remote_ls_ctx *ls),
735 void *userData);
737 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
739 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
741 if (tag_closed) {
742 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
743 if (ls->dentry_flags & IS_DIR) {
744 if (ls->flags & PROCESS_DIRS) {
745 ls->userFunc(ls);
747 if (strcmp(ls->dentry_name, ls->path) &&
748 ls->flags & RECURSIVE) {
749 ls->rc = remote_ls(ls->repo,
750 ls->dentry_name,
751 ls->flags,
752 ls->userFunc,
753 ls->userData);
755 } else if (ls->flags & PROCESS_FILES) {
756 ls->userFunc(ls);
758 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
759 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
760 ls->repo->path_len + 1);
761 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
762 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
763 ls->dentry_flags |= IS_DIR;
765 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
766 if (ls->dentry_name) {
767 free(ls->dentry_name);
769 ls->dentry_name = NULL;
770 ls->dentry_flags = 0;
774 static int remote_ls(struct alt_base *repo, const char *path, int flags,
775 void (*userFunc)(struct remote_ls_ctx *ls),
776 void *userData)
778 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
779 struct active_request_slot *slot;
780 struct slot_results results;
781 struct buffer in_buffer;
782 struct buffer out_buffer;
783 char *in_data;
784 char *out_data;
785 XML_Parser parser = XML_ParserCreate(NULL);
786 enum XML_Status result;
787 struct curl_slist *dav_headers = NULL;
788 struct xml_ctx ctx;
789 struct remote_ls_ctx ls;
791 ls.flags = flags;
792 ls.repo = repo;
793 ls.path = strdup(path);
794 ls.dentry_name = NULL;
795 ls.dentry_flags = 0;
796 ls.userData = userData;
797 ls.userFunc = userFunc;
798 ls.rc = 0;
800 sprintf(url, "%s%s", repo->base, path);
802 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
803 out_data = xmalloc(out_buffer.size + 1);
804 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
805 out_buffer.posn = 0;
806 out_buffer.buffer = out_data;
808 in_buffer.size = 4096;
809 in_data = xmalloc(in_buffer.size);
810 in_buffer.posn = 0;
811 in_buffer.buffer = in_data;
813 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
814 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
816 slot = get_active_slot();
817 slot->results = &results;
818 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
819 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
820 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
821 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
822 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
823 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
824 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
825 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
826 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
828 if (start_active_slot(slot)) {
829 run_active_slot(slot);
830 if (results.curl_result == CURLE_OK) {
831 ctx.name = xcalloc(10, 1);
832 ctx.len = 0;
833 ctx.cdata = NULL;
834 ctx.userFunc = handle_remote_ls_ctx;
835 ctx.userData = &ls;
836 XML_SetUserData(parser, &ctx);
837 XML_SetElementHandler(parser, xml_start_tag,
838 xml_end_tag);
839 XML_SetCharacterDataHandler(parser, xml_cdata);
840 result = XML_Parse(parser, in_buffer.buffer,
841 in_buffer.posn, 1);
842 free(ctx.name);
844 if (result != XML_STATUS_OK) {
845 ls.rc = error("XML error: %s",
846 XML_ErrorString(
847 XML_GetErrorCode(parser)));
849 } else {
850 ls.rc = -1;
852 } else {
853 ls.rc = error("Unable to start PROPFIND request");
856 free(ls.path);
857 free(url);
858 free(out_data);
859 free(in_buffer.buffer);
860 curl_slist_free_all(dav_headers);
862 return ls.rc;
865 static void process_ls_pack(struct remote_ls_ctx *ls)
867 unsigned char sha1[20];
869 if (strlen(ls->dentry_name) == 63 &&
870 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
871 !strncmp(ls->dentry_name+58, ".pack", 5)) {
872 get_sha1_hex(ls->dentry_name + 18, sha1);
873 setup_index(ls->repo, sha1);
876 #endif
878 static int fetch_indices(struct alt_base *repo)
880 unsigned char sha1[20];
881 char *url;
882 struct buffer buffer;
883 char *data;
884 int i = 0;
886 struct active_request_slot *slot;
887 struct slot_results results;
889 if (repo->got_indices)
890 return 0;
892 data = xmalloc(4096);
893 buffer.size = 4096;
894 buffer.posn = 0;
895 buffer.buffer = data;
897 if (get_verbosely)
898 fprintf(stderr, "Getting pack list for %s\n", repo->base);
900 #ifndef NO_EXPAT
901 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
902 process_ls_pack, NULL) == 0)
903 return 0;
904 #endif
906 url = xmalloc(strlen(repo->base) + 21);
907 sprintf(url, "%s/objects/info/packs", repo->base);
909 slot = get_active_slot();
910 slot->results = &results;
911 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
912 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
913 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
914 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
915 if (start_active_slot(slot)) {
916 run_active_slot(slot);
917 if (results.curl_result != CURLE_OK) {
918 if (results.http_code == 404 ||
919 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
920 repo->got_indices = 1;
921 free(buffer.buffer);
922 return 0;
923 } else {
924 repo->got_indices = 0;
925 free(buffer.buffer);
926 return error("%s", curl_errorstr);
929 } else {
930 repo->got_indices = 0;
931 free(buffer.buffer);
932 return error("Unable to start request");
935 data = buffer.buffer;
936 while (i < buffer.posn) {
937 switch (data[i]) {
938 case 'P':
939 i++;
940 if (i + 52 <= buffer.posn &&
941 !strncmp(data + i, " pack-", 6) &&
942 !strncmp(data + i + 46, ".pack\n", 6)) {
943 get_sha1_hex(data + i + 6, sha1);
944 setup_index(repo, sha1);
945 i += 51;
946 break;
948 default:
949 while (i < buffer.posn && data[i] != '\n')
950 i++;
952 i++;
955 free(buffer.buffer);
956 repo->got_indices = 1;
957 return 0;
960 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
962 char *url;
963 struct packed_git *target;
964 struct packed_git **lst;
965 FILE *packfile;
966 char *filename;
967 char tmpfile[PATH_MAX];
968 int ret;
969 long prev_posn = 0;
970 char range[RANGE_HEADER_SIZE];
971 struct curl_slist *range_header = NULL;
973 struct active_request_slot *slot;
974 struct slot_results results;
976 if (fetch_indices(repo))
977 return -1;
978 target = find_sha1_pack(sha1, repo->packs);
979 if (!target)
980 return -1;
982 if (get_verbosely) {
983 fprintf(stderr, "Getting pack %s\n",
984 sha1_to_hex(target->sha1));
985 fprintf(stderr, " which contains %s\n",
986 sha1_to_hex(sha1));
989 url = xmalloc(strlen(repo->base) + 65);
990 sprintf(url, "%s/objects/pack/pack-%s.pack",
991 repo->base, sha1_to_hex(target->sha1));
993 filename = sha1_pack_name(target->sha1);
994 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
995 packfile = fopen(tmpfile, "a");
996 if (!packfile)
997 return error("Unable to open local file %s for pack",
998 filename);
1000 slot = get_active_slot();
1001 slot->results = &results;
1002 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1003 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1004 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1005 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1006 slot->local = packfile;
1008 /* If there is data present from a previous transfer attempt,
1009 resume where it left off */
1010 prev_posn = ftell(packfile);
1011 if (prev_posn>0) {
1012 if (get_verbosely)
1013 fprintf(stderr,
1014 "Resuming fetch of pack %s at byte %ld\n",
1015 sha1_to_hex(target->sha1), prev_posn);
1016 sprintf(range, "Range: bytes=%ld-", prev_posn);
1017 range_header = curl_slist_append(range_header, range);
1018 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021 if (start_active_slot(slot)) {
1022 run_active_slot(slot);
1023 if (results.curl_result != CURLE_OK) {
1024 fclose(packfile);
1025 return error("Unable to get pack file %s\n%s", url,
1026 curl_errorstr);
1028 } else {
1029 fclose(packfile);
1030 return error("Unable to start request");
1033 fclose(packfile);
1035 ret = move_temp_to_file(tmpfile, filename);
1036 if (ret)
1037 return ret;
1039 lst = &repo->packs;
1040 while (*lst != target)
1041 lst = &((*lst)->next);
1042 *lst = (*lst)->next;
1044 if (verify_pack(target, 0))
1045 return -1;
1046 install_packed_git(target);
1048 return 0;
1051 static void abort_object_request(struct object_request *obj_req)
1053 if (obj_req->local >= 0) {
1054 close(obj_req->local);
1055 obj_req->local = -1;
1057 unlink(obj_req->tmpfile);
1058 if (obj_req->slot) {
1059 release_active_slot(obj_req->slot);
1060 obj_req->slot = NULL;
1062 release_object_request(obj_req);
1065 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1067 char *hex = sha1_to_hex(sha1);
1068 int ret = 0;
1069 struct object_request *obj_req = object_queue_head;
1071 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1072 obj_req = obj_req->next;
1073 if (obj_req == NULL)
1074 return error("Couldn't find request for %s in the queue", hex);
1076 if (has_sha1_file(obj_req->sha1)) {
1077 abort_object_request(obj_req);
1078 return 0;
1081 #ifdef USE_CURL_MULTI
1082 while (obj_req->state == WAITING) {
1083 step_active_slots();
1085 #else
1086 start_object_request(obj_req);
1087 #endif
1089 while (obj_req->state == ACTIVE) {
1090 run_active_slot(obj_req->slot);
1092 if (obj_req->local != -1) {
1093 close(obj_req->local); obj_req->local = -1;
1096 if (obj_req->state == ABORTED) {
1097 ret = error("Request for %s aborted", hex);
1098 } else if (obj_req->curl_result != CURLE_OK &&
1099 obj_req->http_code != 416) {
1100 if (obj_req->http_code == 404 ||
1101 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1102 ret = -1; /* Be silent, it is probably in a pack. */
1103 else
1104 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1105 obj_req->errorstr, obj_req->curl_result,
1106 obj_req->http_code, hex);
1107 } else if (obj_req->zret != Z_STREAM_END) {
1108 corrupt_object_found++;
1109 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1110 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1111 ret = error("File %s has bad hash", hex);
1112 } else if (obj_req->rename < 0) {
1113 ret = error("unable to write sha1 filename %s",
1114 obj_req->filename);
1117 release_object_request(obj_req);
1118 return ret;
1121 int fetch(unsigned char *sha1)
1123 struct alt_base *altbase = alt;
1125 if (!fetch_object(altbase, sha1))
1126 return 0;
1127 while (altbase) {
1128 if (!fetch_pack(altbase, sha1))
1129 return 0;
1130 fetch_alternates(alt->base);
1131 altbase = altbase->next;
1133 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1134 alt->base);
1137 static inline int needs_quote(int ch)
1139 if (((ch >= 'A') && (ch <= 'Z'))
1140 || ((ch >= 'a') && (ch <= 'z'))
1141 || ((ch >= '0') && (ch <= '9'))
1142 || (ch == '/')
1143 || (ch == '-')
1144 || (ch == '.'))
1145 return 0;
1146 return 1;
1149 static inline int hex(int v)
1151 if (v < 10) return '0' + v;
1152 else return 'A' + v - 10;
1155 static char *quote_ref_url(const char *base, const char *ref)
1157 const char *cp;
1158 char *dp, *qref;
1159 int len, baselen, ch;
1161 baselen = strlen(base);
1162 len = baselen + 6; /* "refs/" + NUL */
1163 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164 if (needs_quote(ch))
1165 len += 2; /* extra two hex plus replacement % */
1166 qref = xmalloc(len);
1167 memcpy(qref, base, baselen);
1168 memcpy(qref + baselen, "refs/", 5);
1169 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170 if (needs_quote(ch)) {
1171 *dp++ = '%';
1172 *dp++ = hex((ch >> 4) & 0xF);
1173 *dp++ = hex(ch & 0xF);
1175 else
1176 *dp++ = ch;
1178 *dp = 0;
1180 return qref;
1183 int fetch_ref(char *ref, unsigned char *sha1)
1185 char *url;
1186 char hex[42];
1187 struct buffer buffer;
1188 char *base = alt->base;
1189 struct active_request_slot *slot;
1190 struct slot_results results;
1191 buffer.size = 41;
1192 buffer.posn = 0;
1193 buffer.buffer = hex;
1194 hex[41] = '\0';
1196 url = quote_ref_url(base, ref);
1197 slot = get_active_slot();
1198 slot->results = &results;
1199 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203 if (start_active_slot(slot)) {
1204 run_active_slot(slot);
1205 if (results.curl_result != CURLE_OK)
1206 return error("Couldn't get %s for %s\n%s",
1207 url, ref, curl_errorstr);
1208 } else {
1209 return error("Unable to start request");
1212 hex[40] = '\0';
1213 get_sha1_hex(hex, sha1);
1214 return 0;
1217 int main(int argc, char **argv)
1219 char *commit_id;
1220 char *url;
1221 char *path;
1222 int arg = 1;
1223 int rc = 0;
1225 setup_git_directory();
1226 git_config(git_default_config);
1228 while (arg < argc && argv[arg][0] == '-') {
1229 if (argv[arg][1] == 't') {
1230 get_tree = 1;
1231 } else if (argv[arg][1] == 'c') {
1232 get_history = 1;
1233 } else if (argv[arg][1] == 'a') {
1234 get_all = 1;
1235 get_tree = 1;
1236 get_history = 1;
1237 } else if (argv[arg][1] == 'v') {
1238 get_verbosely = 1;
1239 } else if (argv[arg][1] == 'w') {
1240 write_ref = argv[arg + 1];
1241 arg++;
1242 } else if (!strcmp(argv[arg], "--recover")) {
1243 get_recover = 1;
1245 arg++;
1247 if (argc < arg + 2) {
1248 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1249 return 1;
1251 commit_id = argv[arg];
1252 url = argv[arg + 1];
1253 write_ref_log_details = url;
1255 http_init();
1257 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1259 alt = xmalloc(sizeof(*alt));
1260 alt->base = url;
1261 alt->got_indices = 0;
1262 alt->packs = NULL;
1263 alt->next = NULL;
1264 path = strstr(url, "//");
1265 if (path) {
1266 path = strchr(path+2, '/');
1267 if (path)
1268 alt->path_len = strlen(path);
1271 if (pull(commit_id))
1272 rc = 1;
1274 http_cleanup();
1276 curl_slist_free_all(no_pragma_header);
1278 if (corrupt_object_found) {
1279 fprintf(stderr,
1280 "Some loose object were found to be corrupt, but they might be just\n"
1281 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1282 "status code. Suggest running git fsck-objects.\n");
1284 return rc;