Rename builtin-zip-tree.c to archive-zip.c
[git/wpalmer.git] / http-fetch.c
blobbc74f30f76fe0200f6c7ecc215ee1cd9211670f4
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
7 #ifndef NO_EXPAT
8 #include <expat.h>
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20 XML_STATUS_OK = 1,
21 XML_STATUS_ERROR = 0
23 #define XML_STATUS_OK 1
24 #define XML_STATUS_ERROR 0
25 #endif
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS (1u << 1)
30 #define RECURSIVE (1u << 2)
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
39 static int commits_on_stdin;
41 static int got_alternates = -1;
42 static int corrupt_object_found;
44 static struct curl_slist *no_pragma_header;
46 struct alt_base
48 const char *base;
49 int path_len;
50 int got_indices;
51 struct packed_git *packs;
52 struct alt_base *next;
55 static struct alt_base *alt;
57 enum object_request_state {
58 WAITING,
59 ABORTED,
60 ACTIVE,
61 COMPLETE,
64 struct object_request
66 unsigned char sha1[20];
67 struct alt_base *repo;
68 char *url;
69 char filename[PATH_MAX];
70 char tmpfile[PATH_MAX];
71 int local;
72 enum object_request_state state;
73 CURLcode curl_result;
74 char errorstr[CURL_ERROR_SIZE];
75 long http_code;
76 unsigned char real_sha1[20];
77 SHA_CTX c;
78 z_stream stream;
79 int zret;
80 int rename;
81 struct active_request_slot *slot;
82 struct object_request *next;
85 struct alternates_request {
86 const char *base;
87 char *url;
88 struct buffer *buffer;
89 struct active_request_slot *slot;
90 int http_specific;
93 #ifndef NO_EXPAT
94 struct xml_ctx
96 char *name;
97 int len;
98 char *cdata;
99 void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100 void *userData;
103 struct remote_ls_ctx
105 struct alt_base *repo;
106 char *path;
107 void (*userFunc)(struct remote_ls_ctx *ls);
108 void *userData;
109 int flags;
110 char *dentry_name;
111 int dentry_flags;
112 int rc;
113 struct remote_ls_ctx *parent;
115 #endif
117 static struct object_request *object_queue_head;
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120 void *data)
122 unsigned char expn[4096];
123 size_t size = eltsize * nmemb;
124 int posn = 0;
125 struct object_request *obj_req = (struct object_request *)data;
126 do {
127 ssize_t retval = write(obj_req->local,
128 (char *) ptr + posn, size - posn);
129 if (retval < 0)
130 return posn;
131 posn += retval;
132 } while (posn < size);
134 obj_req->stream.avail_in = size;
135 obj_req->stream.next_in = ptr;
136 do {
137 obj_req->stream.next_out = expn;
138 obj_req->stream.avail_out = sizeof(expn);
139 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140 SHA1_Update(&obj_req->c, expn,
141 sizeof(expn) - obj_req->stream.avail_out);
142 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143 data_received++;
144 return size;
147 static int missing__target(int code, int result)
149 return /* file:// URL -- do we ever use one??? */
150 (result == CURLE_FILE_COULDNT_READ_FILE) ||
151 /* http:// and https:// URL */
152 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR) ||
153 /* ftp:// URL */
154 (code == 550 && result == CURLE_FTP_COULDNT_RETR_FILE)
158 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
160 static void fetch_alternates(const char *base);
162 static void process_object_response(void *callback_data);
164 static void start_object_request(struct object_request *obj_req)
166 char *hex = sha1_to_hex(obj_req->sha1);
167 char prevfile[PATH_MAX];
168 char *url;
169 char *posn;
170 int prevlocal;
171 unsigned char prev_buf[PREV_BUF_SIZE];
172 ssize_t prev_read = 0;
173 long prev_posn = 0;
174 char range[RANGE_HEADER_SIZE];
175 struct curl_slist *range_header = NULL;
176 struct active_request_slot *slot;
178 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
179 unlink(prevfile);
180 rename(obj_req->tmpfile, prevfile);
181 unlink(obj_req->tmpfile);
183 if (obj_req->local != -1)
184 error("fd leakage in start: %d", obj_req->local);
185 obj_req->local = open(obj_req->tmpfile,
186 O_WRONLY | O_CREAT | O_EXCL, 0666);
187 /* This could have failed due to the "lazy directory creation";
188 * try to mkdir the last path component.
190 if (obj_req->local < 0 && errno == ENOENT) {
191 char *dir = strrchr(obj_req->tmpfile, '/');
192 if (dir) {
193 *dir = 0;
194 mkdir(obj_req->tmpfile, 0777);
195 *dir = '/';
197 obj_req->local = open(obj_req->tmpfile,
198 O_WRONLY | O_CREAT | O_EXCL, 0666);
201 if (obj_req->local < 0) {
202 obj_req->state = ABORTED;
203 error("Couldn't create temporary file %s for %s: %s",
204 obj_req->tmpfile, obj_req->filename, strerror(errno));
205 return;
208 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
210 inflateInit(&obj_req->stream);
212 SHA1_Init(&obj_req->c);
214 url = xmalloc(strlen(obj_req->repo->base) + 50);
215 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
216 strcpy(url, obj_req->repo->base);
217 posn = url + strlen(obj_req->repo->base);
218 strcpy(posn, "objects/");
219 posn += 8;
220 memcpy(posn, hex, 2);
221 posn += 2;
222 *(posn++) = '/';
223 strcpy(posn, hex + 2);
224 strcpy(obj_req->url, url);
226 /* If a previous temp file is present, process what was already
227 fetched. */
228 prevlocal = open(prevfile, O_RDONLY);
229 if (prevlocal != -1) {
230 do {
231 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
232 if (prev_read>0) {
233 if (fwrite_sha1_file(prev_buf,
235 prev_read,
236 obj_req) == prev_read) {
237 prev_posn += prev_read;
238 } else {
239 prev_read = -1;
242 } while (prev_read > 0);
243 close(prevlocal);
245 unlink(prevfile);
247 /* Reset inflate/SHA1 if there was an error reading the previous temp
248 file; also rewind to the beginning of the local file. */
249 if (prev_read == -1) {
250 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
251 inflateInit(&obj_req->stream);
252 SHA1_Init(&obj_req->c);
253 if (prev_posn>0) {
254 prev_posn = 0;
255 lseek(obj_req->local, SEEK_SET, 0);
256 ftruncate(obj_req->local, 0);
260 slot = get_active_slot();
261 slot->callback_func = process_object_response;
262 slot->callback_data = obj_req;
263 obj_req->slot = slot;
265 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
266 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
267 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
268 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
269 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
271 /* If we have successfully processed data from a previous fetch
272 attempt, only fetch the data we don't already have. */
273 if (prev_posn>0) {
274 if (get_verbosely)
275 fprintf(stderr,
276 "Resuming fetch of object %s at byte %ld\n",
277 hex, prev_posn);
278 sprintf(range, "Range: bytes=%ld-", prev_posn);
279 range_header = curl_slist_append(range_header, range);
280 curl_easy_setopt(slot->curl,
281 CURLOPT_HTTPHEADER, range_header);
284 /* Try to get the request started, abort the request on error */
285 obj_req->state = ACTIVE;
286 if (!start_active_slot(slot)) {
287 obj_req->state = ABORTED;
288 obj_req->slot = NULL;
289 close(obj_req->local); obj_req->local = -1;
290 free(obj_req->url);
291 return;
295 static void finish_object_request(struct object_request *obj_req)
297 struct stat st;
299 fchmod(obj_req->local, 0444);
300 close(obj_req->local); obj_req->local = -1;
302 if (obj_req->http_code == 416) {
303 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
304 } else if (obj_req->curl_result != CURLE_OK) {
305 if (stat(obj_req->tmpfile, &st) == 0)
306 if (st.st_size == 0)
307 unlink(obj_req->tmpfile);
308 return;
311 inflateEnd(&obj_req->stream);
312 SHA1_Final(obj_req->real_sha1, &obj_req->c);
313 if (obj_req->zret != Z_STREAM_END) {
314 unlink(obj_req->tmpfile);
315 return;
317 if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
318 unlink(obj_req->tmpfile);
319 return;
321 obj_req->rename =
322 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
324 if (obj_req->rename == 0)
325 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
328 static void process_object_response(void *callback_data)
330 struct object_request *obj_req =
331 (struct object_request *)callback_data;
333 obj_req->curl_result = obj_req->slot->curl_result;
334 obj_req->http_code = obj_req->slot->http_code;
335 obj_req->slot = NULL;
336 obj_req->state = COMPLETE;
338 /* Use alternates if necessary */
339 if (missing_target(obj_req)) {
340 fetch_alternates(alt->base);
341 if (obj_req->repo->next != NULL) {
342 obj_req->repo =
343 obj_req->repo->next;
344 close(obj_req->local);
345 obj_req->local = -1;
346 start_object_request(obj_req);
347 return;
351 finish_object_request(obj_req);
354 static void release_object_request(struct object_request *obj_req)
356 struct object_request *entry = object_queue_head;
358 if (obj_req->local != -1)
359 error("fd leakage in release: %d", obj_req->local);
360 if (obj_req == object_queue_head) {
361 object_queue_head = obj_req->next;
362 } else {
363 while (entry->next != NULL && entry->next != obj_req)
364 entry = entry->next;
365 if (entry->next == obj_req)
366 entry->next = entry->next->next;
369 free(obj_req->url);
370 free(obj_req);
373 #ifdef USE_CURL_MULTI
374 void fill_active_slots(void)
376 struct object_request *obj_req = object_queue_head;
377 struct active_request_slot *slot = active_queue_head;
378 int num_transfers;
380 while (active_requests < max_requests && obj_req != NULL) {
381 if (obj_req->state == WAITING) {
382 if (has_sha1_file(obj_req->sha1))
383 obj_req->state = COMPLETE;
384 else
385 start_object_request(obj_req);
386 curl_multi_perform(curlm, &num_transfers);
388 obj_req = obj_req->next;
391 while (slot != NULL) {
392 if (!slot->in_use && slot->curl != NULL) {
393 curl_easy_cleanup(slot->curl);
394 slot->curl = NULL;
396 slot = slot->next;
399 #endif
401 void prefetch(unsigned char *sha1)
403 struct object_request *newreq;
404 struct object_request *tail;
405 char *filename = sha1_file_name(sha1);
407 newreq = xmalloc(sizeof(*newreq));
408 hashcpy(newreq->sha1, sha1);
409 newreq->repo = alt;
410 newreq->url = NULL;
411 newreq->local = -1;
412 newreq->state = WAITING;
413 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
414 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
415 "%s.temp", filename);
416 newreq->slot = NULL;
417 newreq->next = NULL;
419 if (object_queue_head == NULL) {
420 object_queue_head = newreq;
421 } else {
422 tail = object_queue_head;
423 while (tail->next != NULL) {
424 tail = tail->next;
426 tail->next = newreq;
429 #ifdef USE_CURL_MULTI
430 fill_active_slots();
431 step_active_slots();
432 #endif
435 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
437 char *hex = sha1_to_hex(sha1);
438 char *filename;
439 char *url;
440 char tmpfile[PATH_MAX];
441 long prev_posn = 0;
442 char range[RANGE_HEADER_SIZE];
443 struct curl_slist *range_header = NULL;
445 FILE *indexfile;
446 struct active_request_slot *slot;
447 struct slot_results results;
449 if (has_pack_index(sha1))
450 return 0;
452 if (get_verbosely)
453 fprintf(stderr, "Getting index for pack %s\n", hex);
455 url = xmalloc(strlen(repo->base) + 64);
456 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
458 filename = sha1_pack_index_name(sha1);
459 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
460 indexfile = fopen(tmpfile, "a");
461 if (!indexfile)
462 return error("Unable to open local file %s for pack index",
463 filename);
465 slot = get_active_slot();
466 slot->results = &results;
467 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
468 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
469 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
470 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
471 slot->local = indexfile;
473 /* If there is data present from a previous transfer attempt,
474 resume where it left off */
475 prev_posn = ftell(indexfile);
476 if (prev_posn>0) {
477 if (get_verbosely)
478 fprintf(stderr,
479 "Resuming fetch of index for pack %s at byte %ld\n",
480 hex, prev_posn);
481 sprintf(range, "Range: bytes=%ld-", prev_posn);
482 range_header = curl_slist_append(range_header, range);
483 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
486 if (start_active_slot(slot)) {
487 run_active_slot(slot);
488 if (results.curl_result != CURLE_OK) {
489 fclose(indexfile);
490 return error("Unable to get pack index %s\n%s", url,
491 curl_errorstr);
493 } else {
494 fclose(indexfile);
495 return error("Unable to start request");
498 fclose(indexfile);
500 return move_temp_to_file(tmpfile, filename);
503 static int setup_index(struct alt_base *repo, unsigned char *sha1)
505 struct packed_git *new_pack;
506 if (has_pack_file(sha1))
507 return 0; /* don't list this as something we can get */
509 if (fetch_index(repo, sha1))
510 return -1;
512 new_pack = parse_pack_index(sha1);
513 new_pack->next = repo->packs;
514 repo->packs = new_pack;
515 return 0;
518 static void process_alternates_response(void *callback_data)
520 struct alternates_request *alt_req =
521 (struct alternates_request *)callback_data;
522 struct active_request_slot *slot = alt_req->slot;
523 struct alt_base *tail = alt;
524 const char *base = alt_req->base;
525 static const char null_byte = '\0';
526 char *data;
527 int i = 0;
529 if (alt_req->http_specific) {
530 if (slot->curl_result != CURLE_OK ||
531 !alt_req->buffer->posn) {
533 /* Try reusing the slot to get non-http alternates */
534 alt_req->http_specific = 0;
535 sprintf(alt_req->url, "%s/objects/info/alternates",
536 base);
537 curl_easy_setopt(slot->curl, CURLOPT_URL,
538 alt_req->url);
539 active_requests++;
540 slot->in_use = 1;
541 if (slot->finished != NULL)
542 (*slot->finished) = 0;
543 if (!start_active_slot(slot)) {
544 got_alternates = -1;
545 slot->in_use = 0;
546 if (slot->finished != NULL)
547 (*slot->finished) = 1;
549 return;
551 } else if (slot->curl_result != CURLE_OK) {
552 if (!missing_target(slot)) {
553 got_alternates = -1;
554 return;
558 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
559 alt_req->buffer->posn--;
560 data = alt_req->buffer->buffer;
562 while (i < alt_req->buffer->posn) {
563 int posn = i;
564 while (posn < alt_req->buffer->posn && data[posn] != '\n')
565 posn++;
566 if (data[posn] == '\n') {
567 int okay = 0;
568 int serverlen = 0;
569 struct alt_base *newalt;
570 char *target = NULL;
571 char *path;
572 if (data[i] == '/') {
573 /* This counts
574 * http://git.host/pub/scm/linux.git/
575 * -----------here^
576 * so memcpy(dst, base, serverlen) will
577 * copy up to "...git.host".
579 const char *colon_ss = strstr(base,"://");
580 if (colon_ss) {
581 serverlen = (strchr(colon_ss + 3, '/')
582 - base);
583 okay = 1;
585 } else if (!memcmp(data + i, "../", 3)) {
586 /* Relative URL; chop the corresponding
587 * number of subpath from base (and ../
588 * from data), and concatenate the result.
590 * The code first drops ../ from data, and
591 * then drops one ../ from data and one path
592 * from base. IOW, one extra ../ is dropped
593 * from data than path is dropped from base.
595 * This is not wrong. The alternate in
596 * http://git.host/pub/scm/linux.git/
597 * to borrow from
598 * http://git.host/pub/scm/linus.git/
599 * is ../../linus.git/objects/. You need
600 * two ../../ to borrow from your direct
601 * neighbour.
603 i += 3;
604 serverlen = strlen(base);
605 while (i + 2 < posn &&
606 !memcmp(data + i, "../", 3)) {
607 do {
608 serverlen--;
609 } while (serverlen &&
610 base[serverlen - 1] != '/');
611 i += 3;
613 /* If the server got removed, give up. */
614 okay = strchr(base, ':') - base + 3 <
615 serverlen;
616 } else if (alt_req->http_specific) {
617 char *colon = strchr(data + i, ':');
618 char *slash = strchr(data + i, '/');
619 if (colon && slash && colon < data + posn &&
620 slash < data + posn && colon < slash) {
621 okay = 1;
624 /* skip "objects\n" at end */
625 if (okay) {
626 target = xmalloc(serverlen + posn - i - 6);
627 memcpy(target, base, serverlen);
628 memcpy(target + serverlen, data + i,
629 posn - i - 7);
630 target[serverlen + posn - i - 7] = 0;
631 if (get_verbosely)
632 fprintf(stderr,
633 "Also look at %s\n", target);
634 newalt = xmalloc(sizeof(*newalt));
635 newalt->next = NULL;
636 newalt->base = target;
637 newalt->got_indices = 0;
638 newalt->packs = NULL;
639 path = strstr(target, "//");
640 if (path) {
641 path = strchr(path+2, '/');
642 if (path)
643 newalt->path_len = strlen(path);
646 while (tail->next != NULL)
647 tail = tail->next;
648 tail->next = newalt;
651 i = posn + 1;
654 got_alternates = 1;
657 static void fetch_alternates(const char *base)
659 struct buffer buffer;
660 char *url;
661 char *data;
662 struct active_request_slot *slot;
663 struct alternates_request alt_req;
665 /* If another request has already started fetching alternates,
666 wait for them to arrive and return to processing this request's
667 curl message */
668 #ifdef USE_CURL_MULTI
669 while (got_alternates == 0) {
670 step_active_slots();
672 #endif
674 /* Nothing to do if they've already been fetched */
675 if (got_alternates == 1)
676 return;
678 /* Start the fetch */
679 got_alternates = 0;
681 data = xmalloc(4096);
682 buffer.size = 4096;
683 buffer.posn = 0;
684 buffer.buffer = data;
686 if (get_verbosely)
687 fprintf(stderr, "Getting alternates list for %s\n", base);
689 url = xmalloc(strlen(base) + 31);
690 sprintf(url, "%s/objects/info/http-alternates", base);
692 /* Use a callback to process the result, since another request
693 may fail and need to have alternates loaded before continuing */
694 slot = get_active_slot();
695 slot->callback_func = process_alternates_response;
696 slot->callback_data = &alt_req;
698 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
699 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
700 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
702 alt_req.base = base;
703 alt_req.url = url;
704 alt_req.buffer = &buffer;
705 alt_req.http_specific = 1;
706 alt_req.slot = slot;
708 if (start_active_slot(slot))
709 run_active_slot(slot);
710 else
711 got_alternates = -1;
713 free(data);
714 free(url);
717 #ifndef NO_EXPAT
718 static void
719 xml_start_tag(void *userData, const char *name, const char **atts)
721 struct xml_ctx *ctx = (struct xml_ctx *)userData;
722 const char *c = strchr(name, ':');
723 int new_len;
725 if (c == NULL)
726 c = name;
727 else
728 c++;
730 new_len = strlen(ctx->name) + strlen(c) + 2;
732 if (new_len > ctx->len) {
733 ctx->name = xrealloc(ctx->name, new_len);
734 ctx->len = new_len;
736 strcat(ctx->name, ".");
737 strcat(ctx->name, c);
739 free(ctx->cdata);
740 ctx->cdata = NULL;
742 ctx->userFunc(ctx, 0);
745 static void
746 xml_end_tag(void *userData, const char *name)
748 struct xml_ctx *ctx = (struct xml_ctx *)userData;
749 const char *c = strchr(name, ':');
750 char *ep;
752 ctx->userFunc(ctx, 1);
754 if (c == NULL)
755 c = name;
756 else
757 c++;
759 ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
760 *ep = 0;
763 static void
764 xml_cdata(void *userData, const XML_Char *s, int len)
766 struct xml_ctx *ctx = (struct xml_ctx *)userData;
767 free(ctx->cdata);
768 ctx->cdata = xmalloc(len + 1);
769 strlcpy(ctx->cdata, s, len + 1);
772 static int remote_ls(struct alt_base *repo, const char *path, int flags,
773 void (*userFunc)(struct remote_ls_ctx *ls),
774 void *userData);
776 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
778 struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
780 if (tag_closed) {
781 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
782 if (ls->dentry_flags & IS_DIR) {
783 if (ls->flags & PROCESS_DIRS) {
784 ls->userFunc(ls);
786 if (strcmp(ls->dentry_name, ls->path) &&
787 ls->flags & RECURSIVE) {
788 ls->rc = remote_ls(ls->repo,
789 ls->dentry_name,
790 ls->flags,
791 ls->userFunc,
792 ls->userData);
794 } else if (ls->flags & PROCESS_FILES) {
795 ls->userFunc(ls);
797 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
798 ls->dentry_name = xmalloc(strlen(ctx->cdata) -
799 ls->repo->path_len + 1);
800 strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
801 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
802 ls->dentry_flags |= IS_DIR;
804 } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
805 free(ls->dentry_name);
806 ls->dentry_name = NULL;
807 ls->dentry_flags = 0;
811 static int remote_ls(struct alt_base *repo, const char *path, int flags,
812 void (*userFunc)(struct remote_ls_ctx *ls),
813 void *userData)
815 char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
816 struct active_request_slot *slot;
817 struct slot_results results;
818 struct buffer in_buffer;
819 struct buffer out_buffer;
820 char *in_data;
821 char *out_data;
822 XML_Parser parser = XML_ParserCreate(NULL);
823 enum XML_Status result;
824 struct curl_slist *dav_headers = NULL;
825 struct xml_ctx ctx;
826 struct remote_ls_ctx ls;
828 ls.flags = flags;
829 ls.repo = repo;
830 ls.path = xstrdup(path);
831 ls.dentry_name = NULL;
832 ls.dentry_flags = 0;
833 ls.userData = userData;
834 ls.userFunc = userFunc;
835 ls.rc = 0;
837 sprintf(url, "%s%s", repo->base, path);
839 out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
840 out_data = xmalloc(out_buffer.size + 1);
841 snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
842 out_buffer.posn = 0;
843 out_buffer.buffer = out_data;
845 in_buffer.size = 4096;
846 in_data = xmalloc(in_buffer.size);
847 in_buffer.posn = 0;
848 in_buffer.buffer = in_data;
850 dav_headers = curl_slist_append(dav_headers, "Depth: 1");
851 dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
853 slot = get_active_slot();
854 slot->results = &results;
855 curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
856 curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
857 curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
858 curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
859 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
860 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
861 curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
862 curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
863 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
865 if (start_active_slot(slot)) {
866 run_active_slot(slot);
867 if (results.curl_result == CURLE_OK) {
868 ctx.name = xcalloc(10, 1);
869 ctx.len = 0;
870 ctx.cdata = NULL;
871 ctx.userFunc = handle_remote_ls_ctx;
872 ctx.userData = &ls;
873 XML_SetUserData(parser, &ctx);
874 XML_SetElementHandler(parser, xml_start_tag,
875 xml_end_tag);
876 XML_SetCharacterDataHandler(parser, xml_cdata);
877 result = XML_Parse(parser, in_buffer.buffer,
878 in_buffer.posn, 1);
879 free(ctx.name);
881 if (result != XML_STATUS_OK) {
882 ls.rc = error("XML error: %s",
883 XML_ErrorString(
884 XML_GetErrorCode(parser)));
886 } else {
887 ls.rc = -1;
889 } else {
890 ls.rc = error("Unable to start PROPFIND request");
893 free(ls.path);
894 free(url);
895 free(out_data);
896 free(in_buffer.buffer);
897 curl_slist_free_all(dav_headers);
899 return ls.rc;
902 static void process_ls_pack(struct remote_ls_ctx *ls)
904 unsigned char sha1[20];
906 if (strlen(ls->dentry_name) == 63 &&
907 !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
908 has_extension(ls->dentry_name, ".pack")) {
909 get_sha1_hex(ls->dentry_name + 18, sha1);
910 setup_index(ls->repo, sha1);
913 #endif
915 static int fetch_indices(struct alt_base *repo)
917 unsigned char sha1[20];
918 char *url;
919 struct buffer buffer;
920 char *data;
921 int i = 0;
923 struct active_request_slot *slot;
924 struct slot_results results;
926 if (repo->got_indices)
927 return 0;
929 data = xmalloc(4096);
930 buffer.size = 4096;
931 buffer.posn = 0;
932 buffer.buffer = data;
934 if (get_verbosely)
935 fprintf(stderr, "Getting pack list for %s\n", repo->base);
937 #ifndef NO_EXPAT
938 if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
939 process_ls_pack, NULL) == 0)
940 return 0;
941 #endif
943 url = xmalloc(strlen(repo->base) + 21);
944 sprintf(url, "%s/objects/info/packs", repo->base);
946 slot = get_active_slot();
947 slot->results = &results;
948 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
949 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
950 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
951 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
952 if (start_active_slot(slot)) {
953 run_active_slot(slot);
954 if (results.curl_result != CURLE_OK) {
955 if (missing_target(&results)) {
956 repo->got_indices = 1;
957 free(buffer.buffer);
958 return 0;
959 } else {
960 repo->got_indices = 0;
961 free(buffer.buffer);
962 return error("%s", curl_errorstr);
965 } else {
966 repo->got_indices = 0;
967 free(buffer.buffer);
968 return error("Unable to start request");
971 data = buffer.buffer;
972 while (i < buffer.posn) {
973 switch (data[i]) {
974 case 'P':
975 i++;
976 if (i + 52 <= buffer.posn &&
977 !strncmp(data + i, " pack-", 6) &&
978 !strncmp(data + i + 46, ".pack\n", 6)) {
979 get_sha1_hex(data + i + 6, sha1);
980 setup_index(repo, sha1);
981 i += 51;
982 break;
984 default:
985 while (i < buffer.posn && data[i] != '\n')
986 i++;
988 i++;
991 free(buffer.buffer);
992 repo->got_indices = 1;
993 return 0;
996 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
998 char *url;
999 struct packed_git *target;
1000 struct packed_git **lst;
1001 FILE *packfile;
1002 char *filename;
1003 char tmpfile[PATH_MAX];
1004 int ret;
1005 long prev_posn = 0;
1006 char range[RANGE_HEADER_SIZE];
1007 struct curl_slist *range_header = NULL;
1009 struct active_request_slot *slot;
1010 struct slot_results results;
1012 if (fetch_indices(repo))
1013 return -1;
1014 target = find_sha1_pack(sha1, repo->packs);
1015 if (!target)
1016 return -1;
1018 if (get_verbosely) {
1019 fprintf(stderr, "Getting pack %s\n",
1020 sha1_to_hex(target->sha1));
1021 fprintf(stderr, " which contains %s\n",
1022 sha1_to_hex(sha1));
1025 url = xmalloc(strlen(repo->base) + 65);
1026 sprintf(url, "%s/objects/pack/pack-%s.pack",
1027 repo->base, sha1_to_hex(target->sha1));
1029 filename = sha1_pack_name(target->sha1);
1030 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1031 packfile = fopen(tmpfile, "a");
1032 if (!packfile)
1033 return error("Unable to open local file %s for pack",
1034 filename);
1036 slot = get_active_slot();
1037 slot->results = &results;
1038 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1039 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1040 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1041 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1042 slot->local = packfile;
1044 /* If there is data present from a previous transfer attempt,
1045 resume where it left off */
1046 prev_posn = ftell(packfile);
1047 if (prev_posn>0) {
1048 if (get_verbosely)
1049 fprintf(stderr,
1050 "Resuming fetch of pack %s at byte %ld\n",
1051 sha1_to_hex(target->sha1), prev_posn);
1052 sprintf(range, "Range: bytes=%ld-", prev_posn);
1053 range_header = curl_slist_append(range_header, range);
1054 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1057 if (start_active_slot(slot)) {
1058 run_active_slot(slot);
1059 if (results.curl_result != CURLE_OK) {
1060 fclose(packfile);
1061 return error("Unable to get pack file %s\n%s", url,
1062 curl_errorstr);
1064 } else {
1065 fclose(packfile);
1066 return error("Unable to start request");
1069 fclose(packfile);
1071 ret = move_temp_to_file(tmpfile, filename);
1072 if (ret)
1073 return ret;
1075 lst = &repo->packs;
1076 while (*lst != target)
1077 lst = &((*lst)->next);
1078 *lst = (*lst)->next;
1080 if (verify_pack(target, 0))
1081 return -1;
1082 install_packed_git(target);
1084 return 0;
1087 static void abort_object_request(struct object_request *obj_req)
1089 if (obj_req->local >= 0) {
1090 close(obj_req->local);
1091 obj_req->local = -1;
1093 unlink(obj_req->tmpfile);
1094 if (obj_req->slot) {
1095 release_active_slot(obj_req->slot);
1096 obj_req->slot = NULL;
1098 release_object_request(obj_req);
1101 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1103 char *hex = sha1_to_hex(sha1);
1104 int ret = 0;
1105 struct object_request *obj_req = object_queue_head;
1107 while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1108 obj_req = obj_req->next;
1109 if (obj_req == NULL)
1110 return error("Couldn't find request for %s in the queue", hex);
1112 if (has_sha1_file(obj_req->sha1)) {
1113 abort_object_request(obj_req);
1114 return 0;
1117 #ifdef USE_CURL_MULTI
1118 while (obj_req->state == WAITING) {
1119 step_active_slots();
1121 #else
1122 start_object_request(obj_req);
1123 #endif
1125 while (obj_req->state == ACTIVE) {
1126 run_active_slot(obj_req->slot);
1128 if (obj_req->local != -1) {
1129 close(obj_req->local); obj_req->local = -1;
1132 if (obj_req->state == ABORTED) {
1133 ret = error("Request for %s aborted", hex);
1134 } else if (obj_req->curl_result != CURLE_OK &&
1135 obj_req->http_code != 416) {
1136 if (missing_target(obj_req))
1137 ret = -1; /* Be silent, it is probably in a pack. */
1138 else
1139 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1140 obj_req->errorstr, obj_req->curl_result,
1141 obj_req->http_code, hex);
1142 } else if (obj_req->zret != Z_STREAM_END) {
1143 corrupt_object_found++;
1144 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1145 } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1146 ret = error("File %s has bad hash", hex);
1147 } else if (obj_req->rename < 0) {
1148 ret = error("unable to write sha1 filename %s",
1149 obj_req->filename);
1152 release_object_request(obj_req);
1153 return ret;
1156 int fetch(unsigned char *sha1)
1158 struct alt_base *altbase = alt;
1160 if (!fetch_object(altbase, sha1))
1161 return 0;
1162 while (altbase) {
1163 if (!fetch_pack(altbase, sha1))
1164 return 0;
1165 fetch_alternates(alt->base);
1166 altbase = altbase->next;
1168 return error("Unable to find %s under %s", sha1_to_hex(sha1),
1169 alt->base);
1172 static inline int needs_quote(int ch)
1174 if (((ch >= 'A') && (ch <= 'Z'))
1175 || ((ch >= 'a') && (ch <= 'z'))
1176 || ((ch >= '0') && (ch <= '9'))
1177 || (ch == '/')
1178 || (ch == '-')
1179 || (ch == '.'))
1180 return 0;
1181 return 1;
1184 static inline int hex(int v)
1186 if (v < 10) return '0' + v;
1187 else return 'A' + v - 10;
1190 static char *quote_ref_url(const char *base, const char *ref)
1192 const char *cp;
1193 char *dp, *qref;
1194 int len, baselen, ch;
1196 baselen = strlen(base);
1197 len = baselen + 6; /* "refs/" + NUL */
1198 for (cp = ref; (ch = *cp) != 0; cp++, len++)
1199 if (needs_quote(ch))
1200 len += 2; /* extra two hex plus replacement % */
1201 qref = xmalloc(len);
1202 memcpy(qref, base, baselen);
1203 memcpy(qref + baselen, "refs/", 5);
1204 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1205 if (needs_quote(ch)) {
1206 *dp++ = '%';
1207 *dp++ = hex((ch >> 4) & 0xF);
1208 *dp++ = hex(ch & 0xF);
1210 else
1211 *dp++ = ch;
1213 *dp = 0;
1215 return qref;
1218 int fetch_ref(char *ref, unsigned char *sha1)
1220 char *url;
1221 char hex[42];
1222 struct buffer buffer;
1223 const char *base = alt->base;
1224 struct active_request_slot *slot;
1225 struct slot_results results;
1226 buffer.size = 41;
1227 buffer.posn = 0;
1228 buffer.buffer = hex;
1229 hex[41] = '\0';
1231 url = quote_ref_url(base, ref);
1232 slot = get_active_slot();
1233 slot->results = &results;
1234 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1235 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1236 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1237 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1238 if (start_active_slot(slot)) {
1239 run_active_slot(slot);
1240 if (results.curl_result != CURLE_OK)
1241 return error("Couldn't get %s for %s\n%s",
1242 url, ref, curl_errorstr);
1243 } else {
1244 return error("Unable to start request");
1247 hex[40] = '\0';
1248 get_sha1_hex(hex, sha1);
1249 return 0;
1252 int main(int argc, const char **argv)
1254 int commits;
1255 const char **write_ref = NULL;
1256 char **commit_id;
1257 const char *url;
1258 char *path;
1259 int arg = 1;
1260 int rc = 0;
1262 setup_ident();
1263 setup_git_directory();
1264 git_config(git_default_config);
1266 while (arg < argc && argv[arg][0] == '-') {
1267 if (argv[arg][1] == 't') {
1268 get_tree = 1;
1269 } else if (argv[arg][1] == 'c') {
1270 get_history = 1;
1271 } else if (argv[arg][1] == 'a') {
1272 get_all = 1;
1273 get_tree = 1;
1274 get_history = 1;
1275 } else if (argv[arg][1] == 'v') {
1276 get_verbosely = 1;
1277 } else if (argv[arg][1] == 'w') {
1278 write_ref = &argv[arg + 1];
1279 arg++;
1280 } else if (!strcmp(argv[arg], "--recover")) {
1281 get_recover = 1;
1282 } else if (!strcmp(argv[arg], "--stdin")) {
1283 commits_on_stdin = 1;
1285 arg++;
1287 if (argc < arg + 2 - commits_on_stdin) {
1288 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1289 return 1;
1291 if (commits_on_stdin) {
1292 commits = pull_targets_stdin(&commit_id, &write_ref);
1293 } else {
1294 commit_id = (char **) &argv[arg++];
1295 commits = 1;
1297 url = argv[arg];
1299 http_init();
1301 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1303 alt = xmalloc(sizeof(*alt));
1304 alt->base = url;
1305 alt->got_indices = 0;
1306 alt->packs = NULL;
1307 alt->next = NULL;
1308 path = strstr(url, "//");
1309 if (path) {
1310 path = strchr(path+2, '/');
1311 if (path)
1312 alt->path_len = strlen(path);
1315 if (pull(commits, commit_id, write_ref, url))
1316 rc = 1;
1318 http_cleanup();
1320 curl_slist_free_all(no_pragma_header);
1322 if (commits_on_stdin)
1323 pull_targets_free(commits, commit_id, write_ref);
1325 if (corrupt_object_found) {
1326 fprintf(stderr,
1327 "Some loose object were found to be corrupt, but they might be just\n"
1328 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1329 "status code. Suggest running git fsck-objects.\n");
1331 return rc;