Merge branch 'kg/external-diff-save-env'
[git.git] / http-walker.c
blob0a392c85b67af254ddeca382faa79458c4ec3ca8
1 #include "cache.h"
2 #include "repository.h"
3 #include "commit.h"
4 #include "walker.h"
5 #include "http.h"
6 #include "list.h"
7 #include "transport.h"
8 #include "packfile.h"
9 #include "object-store.h"
11 struct alt_base {
12 char *base;
13 int got_indices;
14 struct packed_git *packs;
15 struct alt_base *next;
18 enum object_request_state {
19 WAITING,
20 ABORTED,
21 ACTIVE,
22 COMPLETE
25 struct object_request {
26 struct walker *walker;
27 struct object_id oid;
28 struct alt_base *repo;
29 enum object_request_state state;
30 struct http_object_request *req;
31 struct list_head node;
34 struct alternates_request {
35 struct walker *walker;
36 const char *base;
37 struct strbuf *url;
38 struct strbuf *buffer;
39 struct active_request_slot *slot;
40 int http_specific;
43 struct walker_data {
44 const char *url;
45 int got_alternates;
46 struct alt_base *alt;
49 static LIST_HEAD(object_queue_head);
51 static void fetch_alternates(struct walker *walker, const char *base);
53 static void process_object_response(void *callback_data);
55 static void start_object_request(struct walker *walker,
56 struct object_request *obj_req)
58 struct active_request_slot *slot;
59 struct http_object_request *req;
61 req = new_http_object_request(obj_req->repo->base, obj_req->oid.hash);
62 if (req == NULL) {
63 obj_req->state = ABORTED;
64 return;
66 obj_req->req = req;
68 slot = req->slot;
69 slot->callback_func = process_object_response;
70 slot->callback_data = obj_req;
72 /* Try to get the request started, abort the request on error */
73 obj_req->state = ACTIVE;
74 if (!start_active_slot(slot)) {
75 obj_req->state = ABORTED;
76 release_http_object_request(req);
77 return;
81 static void finish_object_request(struct object_request *obj_req)
83 if (finish_http_object_request(obj_req->req))
84 return;
86 if (obj_req->req->rename == 0)
87 walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid));
90 static void process_object_response(void *callback_data)
92 struct object_request *obj_req =
93 (struct object_request *)callback_data;
94 struct walker *walker = obj_req->walker;
95 struct walker_data *data = walker->data;
96 struct alt_base *alt = data->alt;
98 process_http_object_request(obj_req->req);
99 obj_req->state = COMPLETE;
101 /* Use alternates if necessary */
102 if (missing_target(obj_req->req)) {
103 fetch_alternates(walker, alt->base);
104 if (obj_req->repo->next != NULL) {
105 obj_req->repo =
106 obj_req->repo->next;
107 release_http_object_request(obj_req->req);
108 start_object_request(walker, obj_req);
109 return;
113 finish_object_request(obj_req);
116 static void release_object_request(struct object_request *obj_req)
118 if (obj_req->req !=NULL && obj_req->req->localfile != -1)
119 error("fd leakage in release: %d", obj_req->req->localfile);
121 list_del(&obj_req->node);
122 free(obj_req);
125 #ifdef USE_CURL_MULTI
126 static int fill_active_slot(struct walker *walker)
128 struct object_request *obj_req;
129 struct list_head *pos, *tmp, *head = &object_queue_head;
131 list_for_each_safe(pos, tmp, head) {
132 obj_req = list_entry(pos, struct object_request, node);
133 if (obj_req->state == WAITING) {
134 if (has_sha1_file(obj_req->oid.hash))
135 obj_req->state = COMPLETE;
136 else {
137 start_object_request(walker, obj_req);
138 return 1;
142 return 0;
144 #endif
146 static void prefetch(struct walker *walker, unsigned char *sha1)
148 struct object_request *newreq;
149 struct walker_data *data = walker->data;
151 newreq = xmalloc(sizeof(*newreq));
152 newreq->walker = walker;
153 hashcpy(newreq->oid.hash, sha1);
154 newreq->repo = data->alt;
155 newreq->state = WAITING;
156 newreq->req = NULL;
158 http_is_verbose = walker->get_verbosely;
159 list_add_tail(&newreq->node, &object_queue_head);
161 #ifdef USE_CURL_MULTI
162 fill_active_slots();
163 step_active_slots();
164 #endif
167 static int is_alternate_allowed(const char *url)
169 const char *protocols[] = {
170 "http", "https", "ftp", "ftps"
172 int i;
174 if (http_follow_config != HTTP_FOLLOW_ALWAYS) {
175 warning("alternate disabled by http.followRedirects: %s", url);
176 return 0;
179 for (i = 0; i < ARRAY_SIZE(protocols); i++) {
180 const char *end;
181 if (skip_prefix(url, protocols[i], &end) &&
182 starts_with(end, "://"))
183 break;
186 if (i >= ARRAY_SIZE(protocols)) {
187 warning("ignoring alternate with unknown protocol: %s", url);
188 return 0;
190 if (!is_transport_allowed(protocols[i], 0)) {
191 warning("ignoring alternate with restricted protocol: %s", url);
192 return 0;
195 return 1;
198 static void process_alternates_response(void *callback_data)
200 struct alternates_request *alt_req =
201 (struct alternates_request *)callback_data;
202 struct walker *walker = alt_req->walker;
203 struct walker_data *cdata = walker->data;
204 struct active_request_slot *slot = alt_req->slot;
205 struct alt_base *tail = cdata->alt;
206 const char *base = alt_req->base;
207 const char null_byte = '\0';
208 char *data;
209 int i = 0;
211 if (alt_req->http_specific) {
212 if (slot->curl_result != CURLE_OK ||
213 !alt_req->buffer->len) {
215 /* Try reusing the slot to get non-http alternates */
216 alt_req->http_specific = 0;
217 strbuf_reset(alt_req->url);
218 strbuf_addf(alt_req->url, "%s/objects/info/alternates",
219 base);
220 curl_easy_setopt(slot->curl, CURLOPT_URL,
221 alt_req->url->buf);
222 active_requests++;
223 slot->in_use = 1;
224 if (slot->finished != NULL)
225 (*slot->finished) = 0;
226 if (!start_active_slot(slot)) {
227 cdata->got_alternates = -1;
228 slot->in_use = 0;
229 if (slot->finished != NULL)
230 (*slot->finished) = 1;
232 return;
234 } else if (slot->curl_result != CURLE_OK) {
235 if (!missing_target(slot)) {
236 cdata->got_alternates = -1;
237 return;
241 fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
242 alt_req->buffer->len--;
243 data = alt_req->buffer->buf;
245 while (i < alt_req->buffer->len) {
246 int posn = i;
247 while (posn < alt_req->buffer->len && data[posn] != '\n')
248 posn++;
249 if (data[posn] == '\n') {
250 int okay = 0;
251 int serverlen = 0;
252 struct alt_base *newalt;
253 if (data[i] == '/') {
255 * This counts
256 * http://git.host/pub/scm/linux.git/
257 * -----------here^
258 * so memcpy(dst, base, serverlen) will
259 * copy up to "...git.host".
261 const char *colon_ss = strstr(base,"://");
262 if (colon_ss) {
263 serverlen = (strchr(colon_ss + 3, '/')
264 - base);
265 okay = 1;
267 } else if (!memcmp(data + i, "../", 3)) {
269 * Relative URL; chop the corresponding
270 * number of subpath from base (and ../
271 * from data), and concatenate the result.
273 * The code first drops ../ from data, and
274 * then drops one ../ from data and one path
275 * from base. IOW, one extra ../ is dropped
276 * from data than path is dropped from base.
278 * This is not wrong. The alternate in
279 * http://git.host/pub/scm/linux.git/
280 * to borrow from
281 * http://git.host/pub/scm/linus.git/
282 * is ../../linus.git/objects/. You need
283 * two ../../ to borrow from your direct
284 * neighbour.
286 i += 3;
287 serverlen = strlen(base);
288 while (i + 2 < posn &&
289 !memcmp(data + i, "../", 3)) {
290 do {
291 serverlen--;
292 } while (serverlen &&
293 base[serverlen - 1] != '/');
294 i += 3;
296 /* If the server got removed, give up. */
297 okay = strchr(base, ':') - base + 3 <
298 serverlen;
299 } else if (alt_req->http_specific) {
300 char *colon = strchr(data + i, ':');
301 char *slash = strchr(data + i, '/');
302 if (colon && slash && colon < data + posn &&
303 slash < data + posn && colon < slash) {
304 okay = 1;
307 if (okay) {
308 struct strbuf target = STRBUF_INIT;
309 strbuf_add(&target, base, serverlen);
310 strbuf_add(&target, data + i, posn - i);
311 if (!strbuf_strip_suffix(&target, "objects")) {
312 warning("ignoring alternate that does"
313 " not end in 'objects': %s",
314 target.buf);
315 strbuf_release(&target);
316 } else if (is_alternate_allowed(target.buf)) {
317 warning("adding alternate object store: %s",
318 target.buf);
319 newalt = xmalloc(sizeof(*newalt));
320 newalt->next = NULL;
321 newalt->base = strbuf_detach(&target, NULL);
322 newalt->got_indices = 0;
323 newalt->packs = NULL;
325 while (tail->next != NULL)
326 tail = tail->next;
327 tail->next = newalt;
328 } else {
329 strbuf_release(&target);
333 i = posn + 1;
336 cdata->got_alternates = 1;
339 static void fetch_alternates(struct walker *walker, const char *base)
341 struct strbuf buffer = STRBUF_INIT;
342 struct strbuf url = STRBUF_INIT;
343 struct active_request_slot *slot;
344 struct alternates_request alt_req;
345 struct walker_data *cdata = walker->data;
348 * If another request has already started fetching alternates,
349 * wait for them to arrive and return to processing this request's
350 * curl message
352 #ifdef USE_CURL_MULTI
353 while (cdata->got_alternates == 0) {
354 step_active_slots();
356 #endif
358 /* Nothing to do if they've already been fetched */
359 if (cdata->got_alternates == 1)
360 return;
362 /* Start the fetch */
363 cdata->got_alternates = 0;
365 if (walker->get_verbosely)
366 fprintf(stderr, "Getting alternates list for %s\n", base);
368 strbuf_addf(&url, "%s/objects/info/http-alternates", base);
371 * Use a callback to process the result, since another request
372 * may fail and need to have alternates loaded before continuing
374 slot = get_active_slot();
375 slot->callback_func = process_alternates_response;
376 alt_req.walker = walker;
377 slot->callback_data = &alt_req;
379 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
380 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
381 curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
383 alt_req.base = base;
384 alt_req.url = &url;
385 alt_req.buffer = &buffer;
386 alt_req.http_specific = 1;
387 alt_req.slot = slot;
389 if (start_active_slot(slot))
390 run_active_slot(slot);
391 else
392 cdata->got_alternates = -1;
394 strbuf_release(&buffer);
395 strbuf_release(&url);
398 static int fetch_indices(struct walker *walker, struct alt_base *repo)
400 int ret;
402 if (repo->got_indices)
403 return 0;
405 if (walker->get_verbosely)
406 fprintf(stderr, "Getting pack list for %s\n", repo->base);
408 switch (http_get_info_packs(repo->base, &repo->packs)) {
409 case HTTP_OK:
410 case HTTP_MISSING_TARGET:
411 repo->got_indices = 1;
412 ret = 0;
413 break;
414 default:
415 repo->got_indices = 0;
416 ret = -1;
419 return ret;
422 static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
424 struct packed_git *target;
425 int ret;
426 struct slot_results results;
427 struct http_pack_request *preq;
429 if (fetch_indices(walker, repo))
430 return -1;
431 target = find_sha1_pack(sha1, repo->packs);
432 if (!target)
433 return -1;
435 if (walker->get_verbosely) {
436 fprintf(stderr, "Getting pack %s\n",
437 sha1_to_hex(target->sha1));
438 fprintf(stderr, " which contains %s\n",
439 sha1_to_hex(sha1));
442 preq = new_http_pack_request(target, repo->base);
443 if (preq == NULL)
444 goto abort;
445 preq->lst = &repo->packs;
446 preq->slot->results = &results;
448 if (start_active_slot(preq->slot)) {
449 run_active_slot(preq->slot);
450 if (results.curl_result != CURLE_OK) {
451 error("Unable to get pack file %s\n%s", preq->url,
452 curl_errorstr);
453 goto abort;
455 } else {
456 error("Unable to start request");
457 goto abort;
460 ret = finish_http_pack_request(preq);
461 release_http_pack_request(preq);
462 if (ret)
463 return ret;
465 return 0;
467 abort:
468 return -1;
471 static void abort_object_request(struct object_request *obj_req)
473 release_object_request(obj_req);
476 static int fetch_object(struct walker *walker, unsigned char *sha1)
478 char *hex = sha1_to_hex(sha1);
479 int ret = 0;
480 struct object_request *obj_req = NULL;
481 struct http_object_request *req;
482 struct list_head *pos, *head = &object_queue_head;
484 list_for_each(pos, head) {
485 obj_req = list_entry(pos, struct object_request, node);
486 if (hasheq(obj_req->oid.hash, sha1))
487 break;
489 if (obj_req == NULL)
490 return error("Couldn't find request for %s in the queue", hex);
492 if (has_sha1_file(obj_req->oid.hash)) {
493 if (obj_req->req != NULL)
494 abort_http_object_request(obj_req->req);
495 abort_object_request(obj_req);
496 return 0;
499 #ifdef USE_CURL_MULTI
500 while (obj_req->state == WAITING)
501 step_active_slots();
502 #else
503 start_object_request(walker, obj_req);
504 #endif
507 * obj_req->req might change when fetching alternates in the callback
508 * process_object_response; therefore, the "shortcut" variable, req,
509 * is used only after we're done with slots.
511 while (obj_req->state == ACTIVE)
512 run_active_slot(obj_req->req->slot);
514 req = obj_req->req;
516 if (req->localfile != -1) {
517 close(req->localfile);
518 req->localfile = -1;
522 * we turned off CURLOPT_FAILONERROR to avoid losing a
523 * persistent connection and got CURLE_OK.
525 if (req->http_code >= 300 && req->curl_result == CURLE_OK &&
526 (starts_with(req->url, "http://") ||
527 starts_with(req->url, "https://"))) {
528 req->curl_result = CURLE_HTTP_RETURNED_ERROR;
529 xsnprintf(req->errorstr, sizeof(req->errorstr),
530 "HTTP request failed");
533 if (obj_req->state == ABORTED) {
534 ret = error("Request for %s aborted", hex);
535 } else if (req->curl_result != CURLE_OK &&
536 req->http_code != 416) {
537 if (missing_target(req))
538 ret = -1; /* Be silent, it is probably in a pack. */
539 else
540 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
541 req->errorstr, req->curl_result,
542 req->http_code, hex);
543 } else if (req->zret != Z_STREAM_END) {
544 walker->corrupt_object_found++;
545 ret = error("File %s (%s) corrupt", hex, req->url);
546 } else if (!hasheq(obj_req->oid.hash, req->real_sha1)) {
547 ret = error("File %s has bad hash", hex);
548 } else if (req->rename < 0) {
549 struct strbuf buf = STRBUF_INIT;
550 loose_object_path(the_repository, &buf, req->sha1);
551 ret = error("unable to write sha1 filename %s", buf.buf);
552 strbuf_release(&buf);
555 release_http_object_request(req);
556 release_object_request(obj_req);
557 return ret;
560 static int fetch(struct walker *walker, unsigned char *sha1)
562 struct walker_data *data = walker->data;
563 struct alt_base *altbase = data->alt;
565 if (!fetch_object(walker, sha1))
566 return 0;
567 while (altbase) {
568 if (!http_fetch_pack(walker, altbase, sha1))
569 return 0;
570 fetch_alternates(walker, data->alt->base);
571 altbase = altbase->next;
573 return error("Unable to find %s under %s", sha1_to_hex(sha1),
574 data->alt->base);
577 static int fetch_ref(struct walker *walker, struct ref *ref)
579 struct walker_data *data = walker->data;
580 return http_fetch_ref(data->alt->base, ref);
583 static void cleanup(struct walker *walker)
585 struct walker_data *data = walker->data;
586 struct alt_base *alt, *alt_next;
588 if (data) {
589 alt = data->alt;
590 while (alt) {
591 alt_next = alt->next;
593 free(alt->base);
594 free(alt);
596 alt = alt_next;
598 free(data);
599 walker->data = NULL;
603 struct walker *get_http_walker(const char *url)
605 char *s;
606 struct walker_data *data = xmalloc(sizeof(struct walker_data));
607 struct walker *walker = xmalloc(sizeof(struct walker));
609 data->alt = xmalloc(sizeof(*data->alt));
610 data->alt->base = xstrdup(url);
611 for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
612 *s = 0;
614 data->alt->got_indices = 0;
615 data->alt->packs = NULL;
616 data->alt->next = NULL;
617 data->got_alternates = -1;
619 walker->corrupt_object_found = 0;
620 walker->fetch = fetch;
621 walker->fetch_ref = fetch_ref;
622 walker->prefetch = prefetch;
623 walker->cleanup = cleanup;
624 walker->data = data;
626 #ifdef USE_CURL_MULTI
627 add_fill_function(walker, (int (*)(void *)) fill_active_slot);
628 #endif
630 return walker;