fast-export: do not copy from modified file
[git.git] / builtin / fast-export.c
blob6a85c25ca7806f1ec064af4a70fd675b34ff969f
1 /*
2 * "git fast-export" builtin command
4 * Copyright (C) 2007 Johannes E. Schindelin
5 */
6 #include "builtin.h"
7 #include "cache.h"
8 #include "refs.h"
9 #include "commit.h"
10 #include "object.h"
11 #include "tag.h"
12 #include "diff.h"
13 #include "diffcore.h"
14 #include "log-tree.h"
15 #include "revision.h"
16 #include "decorate.h"
17 #include "string-list.h"
18 #include "utf8.h"
19 #include "parse-options.h"
20 #include "quote.h"
21 #include "remote.h"
22 #include "blob.h"
24 static const char *fast_export_usage[] = {
25 N_("git fast-export [rev-list-opts]"),
26 NULL
29 static int progress;
30 static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT;
31 static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
32 static int fake_missing_tagger;
33 static int use_done_feature;
34 static int no_data;
35 static int full_tree;
36 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
37 static struct refspec *refspecs;
38 static int refspecs_nr;
39 static int anonymize;
41 static int parse_opt_signed_tag_mode(const struct option *opt,
42 const char *arg, int unset)
44 if (unset || !strcmp(arg, "abort"))
45 signed_tag_mode = ABORT;
46 else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
47 signed_tag_mode = VERBATIM;
48 else if (!strcmp(arg, "warn"))
49 signed_tag_mode = WARN;
50 else if (!strcmp(arg, "warn-strip"))
51 signed_tag_mode = WARN_STRIP;
52 else if (!strcmp(arg, "strip"))
53 signed_tag_mode = STRIP;
54 else
55 return error("Unknown signed-tags mode: %s", arg);
56 return 0;
59 static int parse_opt_tag_of_filtered_mode(const struct option *opt,
60 const char *arg, int unset)
62 if (unset || !strcmp(arg, "abort"))
63 tag_of_filtered_mode = ERROR;
64 else if (!strcmp(arg, "drop"))
65 tag_of_filtered_mode = DROP;
66 else if (!strcmp(arg, "rewrite"))
67 tag_of_filtered_mode = REWRITE;
68 else
69 return error("Unknown tag-of-filtered mode: %s", arg);
70 return 0;
73 static struct decoration idnums;
74 static uint32_t last_idnum;
76 static int has_unshown_parent(struct commit *commit)
78 struct commit_list *parent;
80 for (parent = commit->parents; parent; parent = parent->next)
81 if (!(parent->item->object.flags & SHOWN) &&
82 !(parent->item->object.flags & UNINTERESTING))
83 return 1;
84 return 0;
87 struct anonymized_entry {
88 struct hashmap_entry hash;
89 const char *orig;
90 size_t orig_len;
91 const char *anon;
92 size_t anon_len;
95 static int anonymized_entry_cmp(const void *va, const void *vb,
96 const void *data)
98 const struct anonymized_entry *a = va, *b = vb;
99 return a->orig_len != b->orig_len ||
100 memcmp(a->orig, b->orig, a->orig_len);
104 * Basically keep a cache of X->Y so that we can repeatedly replace
105 * the same anonymized string with another. The actual generation
106 * is farmed out to the generate function.
108 static const void *anonymize_mem(struct hashmap *map,
109 void *(*generate)(const void *, size_t *),
110 const void *orig, size_t *len)
112 struct anonymized_entry key, *ret;
114 if (!map->cmpfn)
115 hashmap_init(map, anonymized_entry_cmp, 0);
117 hashmap_entry_init(&key, memhash(orig, *len));
118 key.orig = orig;
119 key.orig_len = *len;
120 ret = hashmap_get(map, &key, NULL);
122 if (!ret) {
123 ret = xmalloc(sizeof(*ret));
124 hashmap_entry_init(&ret->hash, key.hash.hash);
125 ret->orig = xstrdup(orig);
126 ret->orig_len = *len;
127 ret->anon = generate(orig, len);
128 ret->anon_len = *len;
129 hashmap_put(map, ret);
132 *len = ret->anon_len;
133 return ret->anon;
137 * We anonymize each component of a path individually,
138 * so that paths a/b and a/c will share a common root.
139 * The paths are cached via anonymize_mem so that repeated
140 * lookups for "a" will yield the same value.
142 static void anonymize_path(struct strbuf *out, const char *path,
143 struct hashmap *map,
144 void *(*generate)(const void *, size_t *))
146 while (*path) {
147 const char *end_of_component = strchrnul(path, '/');
148 size_t len = end_of_component - path;
149 const char *c = anonymize_mem(map, generate, path, &len);
150 strbuf_add(out, c, len);
151 path = end_of_component;
152 if (*path)
153 strbuf_addch(out, *path++);
157 /* Since intptr_t is C99, we do not use it here */
158 static inline uint32_t *mark_to_ptr(uint32_t mark)
160 return ((uint32_t *)NULL) + mark;
163 static inline uint32_t ptr_to_mark(void * mark)
165 return (uint32_t *)mark - (uint32_t *)NULL;
168 static inline void mark_object(struct object *object, uint32_t mark)
170 add_decoration(&idnums, object, mark_to_ptr(mark));
173 static inline void mark_next_object(struct object *object)
175 mark_object(object, ++last_idnum);
178 static int get_object_mark(struct object *object)
180 void *decoration = lookup_decoration(&idnums, object);
181 if (!decoration)
182 return 0;
183 return ptr_to_mark(decoration);
186 static void show_progress(void)
188 static int counter = 0;
189 if (!progress)
190 return;
191 if ((++counter % progress) == 0)
192 printf("progress %d objects\n", counter);
196 * Ideally we would want some transformation of the blob data here
197 * that is unreversible, but would still be the same size and have
198 * the same data relationship to other blobs (so that we get the same
199 * delta and packing behavior as the original). But the first and last
200 * requirements there are probably mutually exclusive, so let's take
201 * the easy way out for now, and just generate arbitrary content.
203 * There's no need to cache this result with anonymize_mem, since
204 * we already handle blob content caching with marks.
206 static char *anonymize_blob(unsigned long *size)
208 static int counter;
209 struct strbuf out = STRBUF_INIT;
210 strbuf_addf(&out, "anonymous blob %d", counter++);
211 *size = out.len;
212 return strbuf_detach(&out, NULL);
215 static void export_blob(const unsigned char *sha1)
217 unsigned long size;
218 enum object_type type;
219 char *buf;
220 struct object *object;
221 int eaten;
223 if (no_data)
224 return;
226 if (is_null_sha1(sha1))
227 return;
229 object = lookup_object(sha1);
230 if (object && object->flags & SHOWN)
231 return;
233 if (anonymize) {
234 buf = anonymize_blob(&size);
235 object = (struct object *)lookup_blob(sha1);
236 eaten = 0;
237 } else {
238 buf = read_sha1_file(sha1, &type, &size);
239 if (!buf)
240 die ("Could not read blob %s", sha1_to_hex(sha1));
241 if (check_sha1_signature(sha1, buf, size, typename(type)) < 0)
242 die("sha1 mismatch in blob %s", sha1_to_hex(sha1));
243 object = parse_object_buffer(sha1, type, size, buf, &eaten);
246 if (!object)
247 die("Could not read blob %s", sha1_to_hex(sha1));
249 mark_next_object(object);
251 printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
252 if (size && fwrite(buf, size, 1, stdout) != 1)
253 die_errno ("Could not write blob '%s'", sha1_to_hex(sha1));
254 printf("\n");
256 show_progress();
258 object->flags |= SHOWN;
259 if (!eaten)
260 free(buf);
263 static int depth_first(const void *a_, const void *b_)
265 const struct diff_filepair *a = *((const struct diff_filepair **)a_);
266 const struct diff_filepair *b = *((const struct diff_filepair **)b_);
267 const char *name_a, *name_b;
268 int len_a, len_b, len;
269 int cmp;
271 name_a = a->one ? a->one->path : a->two->path;
272 name_b = b->one ? b->one->path : b->two->path;
274 len_a = strlen(name_a);
275 len_b = strlen(name_b);
276 len = (len_a < len_b) ? len_a : len_b;
278 /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
279 cmp = memcmp(name_a, name_b, len);
280 if (cmp)
281 return cmp;
282 cmp = len_b - len_a;
283 if (cmp)
284 return cmp;
286 * Move 'R'ename entries last so that all references of the file
287 * appear in the output before it is renamed (e.g., when a file
288 * was copied and renamed in the same commit).
290 return (a->status == 'R') - (b->status == 'R');
293 static void print_path_1(const char *path)
295 int need_quote = quote_c_style(path, NULL, NULL, 0);
296 if (need_quote)
297 quote_c_style(path, NULL, stdout, 0);
298 else if (strchr(path, ' '))
299 printf("\"%s\"", path);
300 else
301 printf("%s", path);
304 static void *anonymize_path_component(const void *path, size_t *len)
306 static int counter;
307 struct strbuf out = STRBUF_INIT;
308 strbuf_addf(&out, "path%d", counter++);
309 return strbuf_detach(&out, len);
312 static void print_path(const char *path)
314 if (!anonymize)
315 print_path_1(path);
316 else {
317 static struct hashmap paths;
318 static struct strbuf anon = STRBUF_INIT;
320 anonymize_path(&anon, path, &paths, anonymize_path_component);
321 print_path_1(anon.buf);
322 strbuf_reset(&anon);
326 static void *generate_fake_sha1(const void *old, size_t *len)
328 static uint32_t counter = 1; /* avoid null sha1 */
329 unsigned char *out = xcalloc(20, 1);
330 put_be32(out + 16, counter++);
331 return out;
334 static const unsigned char *anonymize_sha1(const unsigned char *sha1)
336 static struct hashmap sha1s;
337 size_t len = 20;
338 return anonymize_mem(&sha1s, generate_fake_sha1, sha1, &len);
341 static void show_filemodify(struct diff_queue_struct *q,
342 struct diff_options *options, void *data)
344 int i;
345 struct string_list *changed = data;
348 * Handle files below a directory first, in case they are all deleted
349 * and the directory changes to a file or symlink.
351 QSORT(q->queue, q->nr, depth_first);
353 for (i = 0; i < q->nr; i++) {
354 struct diff_filespec *ospec = q->queue[i]->one;
355 struct diff_filespec *spec = q->queue[i]->two;
357 switch (q->queue[i]->status) {
358 case DIFF_STATUS_DELETED:
359 printf("D ");
360 print_path(spec->path);
361 string_list_insert(changed, spec->path);
362 putchar('\n');
363 break;
365 case DIFF_STATUS_COPIED:
366 case DIFF_STATUS_RENAMED:
368 * If a change in the file corresponding to ospec->path
369 * has been observed, we cannot trust its contents
370 * because the diff is calculated based on the prior
371 * contents, not the current contents. So, declare a
372 * copy or rename only if there was no change observed.
374 if (!string_list_has_string(changed, ospec->path)) {
375 printf("%c ", q->queue[i]->status);
376 print_path(ospec->path);
377 putchar(' ');
378 print_path(spec->path);
379 string_list_insert(changed, spec->path);
380 putchar('\n');
382 if (!oidcmp(&ospec->oid, &spec->oid) &&
383 ospec->mode == spec->mode)
384 break;
386 /* fallthrough */
388 case DIFF_STATUS_TYPE_CHANGED:
389 case DIFF_STATUS_MODIFIED:
390 case DIFF_STATUS_ADDED:
392 * Links refer to objects in another repositories;
393 * output the SHA-1 verbatim.
395 if (no_data || S_ISGITLINK(spec->mode))
396 printf("M %06o %s ", spec->mode,
397 sha1_to_hex(anonymize ?
398 anonymize_sha1(spec->oid.hash) :
399 spec->oid.hash));
400 else {
401 struct object *object = lookup_object(spec->oid.hash);
402 printf("M %06o :%d ", spec->mode,
403 get_object_mark(object));
405 print_path(spec->path);
406 string_list_insert(changed, spec->path);
407 putchar('\n');
408 break;
410 default:
411 die("Unexpected comparison status '%c' for %s, %s",
412 q->queue[i]->status,
413 ospec->path ? ospec->path : "none",
414 spec->path ? spec->path : "none");
419 static const char *find_encoding(const char *begin, const char *end)
421 const char *needle = "\nencoding ";
422 char *bol, *eol;
424 bol = memmem(begin, end ? end - begin : strlen(begin),
425 needle, strlen(needle));
426 if (!bol)
427 return git_commit_encoding;
428 bol += strlen(needle);
429 eol = strchrnul(bol, '\n');
430 *eol = '\0';
431 return bol;
434 static void *anonymize_ref_component(const void *old, size_t *len)
436 static int counter;
437 struct strbuf out = STRBUF_INIT;
438 strbuf_addf(&out, "ref%d", counter++);
439 return strbuf_detach(&out, len);
442 static const char *anonymize_refname(const char *refname)
445 * If any of these prefixes is found, we will leave it intact
446 * so that tags remain tags and so forth.
448 static const char *prefixes[] = {
449 "refs/heads/",
450 "refs/tags/",
451 "refs/remotes/",
452 "refs/"
454 static struct hashmap refs;
455 static struct strbuf anon = STRBUF_INIT;
456 int i;
459 * We also leave "master" as a special case, since it does not reveal
460 * anything interesting.
462 if (!strcmp(refname, "refs/heads/master"))
463 return refname;
465 strbuf_reset(&anon);
466 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
467 if (skip_prefix(refname, prefixes[i], &refname)) {
468 strbuf_addstr(&anon, prefixes[i]);
469 break;
473 anonymize_path(&anon, refname, &refs, anonymize_ref_component);
474 return anon.buf;
478 * We do not even bother to cache commit messages, as they are unlikely
479 * to be repeated verbatim, and it is not that interesting when they are.
481 static char *anonymize_commit_message(const char *old)
483 static int counter;
484 return xstrfmt("subject %d\n\nbody\n", counter++);
487 static struct hashmap idents;
488 static void *anonymize_ident(const void *old, size_t *len)
490 static int counter;
491 struct strbuf out = STRBUF_INIT;
492 strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
493 counter++;
494 return strbuf_detach(&out, len);
498 * Our strategy here is to anonymize the names and email addresses,
499 * but keep timestamps intact, as they influence things like traversal
500 * order (and by themselves should not be too revealing).
502 static void anonymize_ident_line(const char **beg, const char **end)
504 static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
505 static unsigned which_buffer;
507 struct strbuf *out;
508 struct ident_split split;
509 const char *end_of_header;
511 out = &buffers[which_buffer++];
512 which_buffer %= ARRAY_SIZE(buffers);
513 strbuf_reset(out);
515 /* skip "committer", "author", "tagger", etc */
516 end_of_header = strchr(*beg, ' ');
517 if (!end_of_header)
518 die("BUG: malformed line fed to anonymize_ident_line: %.*s",
519 (int)(*end - *beg), *beg);
520 end_of_header++;
521 strbuf_add(out, *beg, end_of_header - *beg);
523 if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
524 split.date_begin) {
525 const char *ident;
526 size_t len;
528 len = split.mail_end - split.name_begin;
529 ident = anonymize_mem(&idents, anonymize_ident,
530 split.name_begin, &len);
531 strbuf_add(out, ident, len);
532 strbuf_addch(out, ' ');
533 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
534 } else {
535 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
538 *beg = out->buf;
539 *end = out->buf + out->len;
542 static void handle_commit(struct commit *commit, struct rev_info *rev,
543 struct string_list *paths_of_changed_objects)
545 int saved_output_format = rev->diffopt.output_format;
546 const char *commit_buffer;
547 const char *author, *author_end, *committer, *committer_end;
548 const char *encoding, *message;
549 char *reencoded = NULL;
550 struct commit_list *p;
551 const char *refname;
552 int i;
554 rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
556 parse_commit_or_die(commit);
557 commit_buffer = get_commit_buffer(commit, NULL);
558 author = strstr(commit_buffer, "\nauthor ");
559 if (!author)
560 die ("Could not find author in commit %s",
561 oid_to_hex(&commit->object.oid));
562 author++;
563 author_end = strchrnul(author, '\n');
564 committer = strstr(author_end, "\ncommitter ");
565 if (!committer)
566 die ("Could not find committer in commit %s",
567 oid_to_hex(&commit->object.oid));
568 committer++;
569 committer_end = strchrnul(committer, '\n');
570 message = strstr(committer_end, "\n\n");
571 encoding = find_encoding(committer_end, message);
572 if (message)
573 message += 2;
575 if (commit->parents &&
576 get_object_mark(&commit->parents->item->object) != 0 &&
577 !full_tree) {
578 parse_commit_or_die(commit->parents->item);
579 diff_tree_sha1(commit->parents->item->tree->object.oid.hash,
580 commit->tree->object.oid.hash, "", &rev->diffopt);
582 else
583 diff_root_tree_sha1(commit->tree->object.oid.hash,
584 "", &rev->diffopt);
586 /* Export the referenced blobs, and remember the marks. */
587 for (i = 0; i < diff_queued_diff.nr; i++)
588 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
589 export_blob(diff_queued_diff.queue[i]->two->oid.hash);
591 refname = commit->util;
592 if (anonymize) {
593 refname = anonymize_refname(refname);
594 anonymize_ident_line(&committer, &committer_end);
595 anonymize_ident_line(&author, &author_end);
598 mark_next_object(&commit->object);
599 if (anonymize)
600 reencoded = anonymize_commit_message(message);
601 else if (!is_encoding_utf8(encoding))
602 reencoded = reencode_string(message, "UTF-8", encoding);
603 if (!commit->parents)
604 printf("reset %s\n", refname);
605 printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
606 refname, last_idnum,
607 (int)(author_end - author), author,
608 (int)(committer_end - committer), committer,
609 (unsigned)(reencoded
610 ? strlen(reencoded) : message
611 ? strlen(message) : 0),
612 reencoded ? reencoded : message ? message : "");
613 free(reencoded);
614 unuse_commit_buffer(commit, commit_buffer);
616 for (i = 0, p = commit->parents; p; p = p->next) {
617 int mark = get_object_mark(&p->item->object);
618 if (!mark)
619 continue;
620 if (i == 0)
621 printf("from :%d\n", mark);
622 else
623 printf("merge :%d\n", mark);
624 i++;
627 if (full_tree)
628 printf("deleteall\n");
629 log_tree_diff_flush(rev);
630 string_list_clear(paths_of_changed_objects, 0);
631 rev->diffopt.output_format = saved_output_format;
633 printf("\n");
635 show_progress();
638 static void *anonymize_tag(const void *old, size_t *len)
640 static int counter;
641 struct strbuf out = STRBUF_INIT;
642 strbuf_addf(&out, "tag message %d", counter++);
643 return strbuf_detach(&out, len);
646 static void handle_tail(struct object_array *commits, struct rev_info *revs,
647 struct string_list *paths_of_changed_objects)
649 struct commit *commit;
650 while (commits->nr) {
651 commit = (struct commit *)commits->objects[commits->nr - 1].item;
652 if (has_unshown_parent(commit))
653 return;
654 handle_commit(commit, revs, paths_of_changed_objects);
655 commits->nr--;
659 static void handle_tag(const char *name, struct tag *tag)
661 unsigned long size;
662 enum object_type type;
663 char *buf;
664 const char *tagger, *tagger_end, *message;
665 size_t message_size = 0;
666 struct object *tagged;
667 int tagged_mark;
668 struct commit *p;
670 /* Trees have no identifier in fast-export output, thus we have no way
671 * to output tags of trees, tags of tags of trees, etc. Simply omit
672 * such tags.
674 tagged = tag->tagged;
675 while (tagged->type == OBJ_TAG) {
676 tagged = ((struct tag *)tagged)->tagged;
678 if (tagged->type == OBJ_TREE) {
679 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
680 oid_to_hex(&tag->object.oid));
681 return;
684 buf = read_sha1_file(tag->object.oid.hash, &type, &size);
685 if (!buf)
686 die ("Could not read tag %s", oid_to_hex(&tag->object.oid));
687 message = memmem(buf, size, "\n\n", 2);
688 if (message) {
689 message += 2;
690 message_size = strlen(message);
692 tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
693 if (!tagger) {
694 if (fake_missing_tagger)
695 tagger = "tagger Unspecified Tagger "
696 "<unspecified-tagger> 0 +0000";
697 else
698 tagger = "";
699 tagger_end = tagger + strlen(tagger);
700 } else {
701 tagger++;
702 tagger_end = strchrnul(tagger, '\n');
703 if (anonymize)
704 anonymize_ident_line(&tagger, &tagger_end);
707 if (anonymize) {
708 name = anonymize_refname(name);
709 if (message) {
710 static struct hashmap tags;
711 message = anonymize_mem(&tags, anonymize_tag,
712 message, &message_size);
716 /* handle signed tags */
717 if (message) {
718 const char *signature = strstr(message,
719 "\n-----BEGIN PGP SIGNATURE-----\n");
720 if (signature)
721 switch(signed_tag_mode) {
722 case ABORT:
723 die ("Encountered signed tag %s; use "
724 "--signed-tags=<mode> to handle it.",
725 oid_to_hex(&tag->object.oid));
726 case WARN:
727 warning ("Exporting signed tag %s",
728 oid_to_hex(&tag->object.oid));
729 /* fallthru */
730 case VERBATIM:
731 break;
732 case WARN_STRIP:
733 warning ("Stripping signature from tag %s",
734 oid_to_hex(&tag->object.oid));
735 /* fallthru */
736 case STRIP:
737 message_size = signature + 1 - message;
738 break;
742 /* handle tag->tagged having been filtered out due to paths specified */
743 tagged = tag->tagged;
744 tagged_mark = get_object_mark(tagged);
745 if (!tagged_mark) {
746 switch(tag_of_filtered_mode) {
747 case ABORT:
748 die ("Tag %s tags unexported object; use "
749 "--tag-of-filtered-object=<mode> to handle it.",
750 oid_to_hex(&tag->object.oid));
751 case DROP:
752 /* Ignore this tag altogether */
753 return;
754 case REWRITE:
755 if (tagged->type != OBJ_COMMIT) {
756 die ("Tag %s tags unexported %s!",
757 oid_to_hex(&tag->object.oid),
758 typename(tagged->type));
760 p = (struct commit *)tagged;
761 for (;;) {
762 if (p->parents && p->parents->next)
763 break;
764 if (p->object.flags & UNINTERESTING)
765 break;
766 if (!(p->object.flags & TREESAME))
767 break;
768 if (!p->parents)
769 die ("Can't find replacement commit for tag %s\n",
770 oid_to_hex(&tag->object.oid));
771 p = p->parents->item;
773 tagged_mark = get_object_mark(&p->object);
777 if (starts_with(name, "refs/tags/"))
778 name += 10;
779 printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
780 name, tagged_mark,
781 (int)(tagger_end - tagger), tagger,
782 tagger == tagger_end ? "" : "\n",
783 (int)message_size, (int)message_size, message ? message : "");
786 static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
788 switch (e->item->type) {
789 case OBJ_COMMIT:
790 return (struct commit *)e->item;
791 case OBJ_TAG: {
792 struct tag *tag = (struct tag *)e->item;
794 /* handle nested tags */
795 while (tag && tag->object.type == OBJ_TAG) {
796 parse_object(tag->object.oid.hash);
797 string_list_append(&extra_refs, full_name)->util = tag;
798 tag = (struct tag *)tag->tagged;
800 if (!tag)
801 die("Tag %s points nowhere?", e->name);
802 return (struct commit *)tag;
803 break;
805 default:
806 return NULL;
810 static void get_tags_and_duplicates(struct rev_cmdline_info *info)
812 int i;
814 for (i = 0; i < info->nr; i++) {
815 struct rev_cmdline_entry *e = info->rev + i;
816 unsigned char sha1[20];
817 struct commit *commit;
818 char *full_name;
820 if (e->flags & UNINTERESTING)
821 continue;
823 if (dwim_ref(e->name, strlen(e->name), sha1, &full_name) != 1)
824 continue;
826 if (refspecs) {
827 char *private;
828 private = apply_refspecs(refspecs, refspecs_nr, full_name);
829 if (private) {
830 free(full_name);
831 full_name = private;
835 commit = get_commit(e, full_name);
836 if (!commit) {
837 warning("%s: Unexpected object of type %s, skipping.",
838 e->name,
839 typename(e->item->type));
840 continue;
843 switch(commit->object.type) {
844 case OBJ_COMMIT:
845 break;
846 case OBJ_BLOB:
847 export_blob(commit->object.oid.hash);
848 continue;
849 default: /* OBJ_TAG (nested tags) is already handled */
850 warning("Tag points to object of unexpected type %s, skipping.",
851 typename(commit->object.type));
852 continue;
856 * This ref will not be updated through a commit, lets make
857 * sure it gets properly updated eventually.
859 if (commit->util || commit->object.flags & SHOWN)
860 string_list_append(&extra_refs, full_name)->util = commit;
861 if (!commit->util)
862 commit->util = full_name;
866 static void handle_tags_and_duplicates(void)
868 struct commit *commit;
869 int i;
871 for (i = extra_refs.nr - 1; i >= 0; i--) {
872 const char *name = extra_refs.items[i].string;
873 struct object *object = extra_refs.items[i].util;
874 switch (object->type) {
875 case OBJ_TAG:
876 handle_tag(name, (struct tag *)object);
877 break;
878 case OBJ_COMMIT:
879 if (anonymize)
880 name = anonymize_refname(name);
881 /* create refs pointing to already seen commits */
882 commit = (struct commit *)object;
883 printf("reset %s\nfrom :%d\n\n", name,
884 get_object_mark(&commit->object));
885 show_progress();
886 break;
891 static void export_marks(char *file)
893 unsigned int i;
894 uint32_t mark;
895 struct object_decoration *deco = idnums.hash;
896 FILE *f;
897 int e = 0;
899 f = fopen_for_writing(file);
900 if (!f)
901 die_errno("Unable to open marks file %s for writing.", file);
903 for (i = 0; i < idnums.size; i++) {
904 if (deco->base && deco->base->type == 1) {
905 mark = ptr_to_mark(deco->decoration);
906 if (fprintf(f, ":%"PRIu32" %s\n", mark,
907 oid_to_hex(&deco->base->oid)) < 0) {
908 e = 1;
909 break;
912 deco++;
915 e |= ferror(f);
916 e |= fclose(f);
917 if (e)
918 error("Unable to write marks file %s.", file);
921 static void import_marks(char *input_file)
923 char line[512];
924 FILE *f = fopen(input_file, "r");
925 if (!f)
926 die_errno("cannot read '%s'", input_file);
928 while (fgets(line, sizeof(line), f)) {
929 uint32_t mark;
930 char *line_end, *mark_end;
931 unsigned char sha1[20];
932 struct object *object;
933 struct commit *commit;
934 enum object_type type;
936 line_end = strchr(line, '\n');
937 if (line[0] != ':' || !line_end)
938 die("corrupt mark line: %s", line);
939 *line_end = '\0';
941 mark = strtoumax(line + 1, &mark_end, 10);
942 if (!mark || mark_end == line + 1
943 || *mark_end != ' ' || get_sha1_hex(mark_end + 1, sha1))
944 die("corrupt mark line: %s", line);
946 if (last_idnum < mark)
947 last_idnum = mark;
949 type = sha1_object_info(sha1, NULL);
950 if (type < 0)
951 die("object not found: %s", sha1_to_hex(sha1));
953 if (type != OBJ_COMMIT)
954 /* only commits */
955 continue;
957 commit = lookup_commit(sha1);
958 if (!commit)
959 die("not a commit? can't happen: %s", sha1_to_hex(sha1));
961 object = &commit->object;
963 if (object->flags & SHOWN)
964 error("Object %s already has a mark", sha1_to_hex(sha1));
966 mark_object(object, mark);
968 object->flags |= SHOWN;
970 fclose(f);
973 static void handle_deletes(void)
975 int i;
976 for (i = 0; i < refspecs_nr; i++) {
977 struct refspec *refspec = &refspecs[i];
978 if (*refspec->src)
979 continue;
981 printf("reset %s\nfrom %s\n\n",
982 refspec->dst, sha1_to_hex(null_sha1));
986 int cmd_fast_export(int argc, const char **argv, const char *prefix)
988 struct rev_info revs;
989 struct object_array commits = OBJECT_ARRAY_INIT;
990 struct commit *commit;
991 char *export_filename = NULL, *import_filename = NULL;
992 uint32_t lastimportid;
993 struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
994 struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
995 struct option options[] = {
996 OPT_INTEGER(0, "progress", &progress,
997 N_("show progress after <n> objects")),
998 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
999 N_("select handling of signed tags"),
1000 parse_opt_signed_tag_mode),
1001 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1002 N_("select handling of tags that tag filtered objects"),
1003 parse_opt_tag_of_filtered_mode),
1004 OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1005 N_("Dump marks to this file")),
1006 OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1007 N_("Import marks from this file")),
1008 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1009 N_("Fake a tagger when tags lack one")),
1010 OPT_BOOL(0, "full-tree", &full_tree,
1011 N_("Output full tree for each commit")),
1012 OPT_BOOL(0, "use-done-feature", &use_done_feature,
1013 N_("Use the done feature to terminate the stream")),
1014 OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
1015 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1016 N_("Apply refspec to exported refs")),
1017 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1018 OPT_END()
1021 if (argc == 1)
1022 usage_with_options (fast_export_usage, options);
1024 /* we handle encodings */
1025 git_config(git_default_config, NULL);
1027 init_revisions(&revs, prefix);
1028 revs.topo_order = 1;
1029 revs.show_source = 1;
1030 revs.rewrite_parents = 1;
1031 argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1032 PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
1033 argc = setup_revisions(argc, argv, &revs, NULL);
1034 if (argc > 1)
1035 usage_with_options (fast_export_usage, options);
1037 if (refspecs_list.nr) {
1038 const char **refspecs_str;
1039 int i;
1041 ALLOC_ARRAY(refspecs_str, refspecs_list.nr);
1042 for (i = 0; i < refspecs_list.nr; i++)
1043 refspecs_str[i] = refspecs_list.items[i].string;
1045 refspecs_nr = refspecs_list.nr;
1046 refspecs = parse_fetch_refspec(refspecs_nr, refspecs_str);
1048 string_list_clear(&refspecs_list, 1);
1049 free(refspecs_str);
1052 if (use_done_feature)
1053 printf("feature done\n");
1055 if (import_filename)
1056 import_marks(import_filename);
1057 lastimportid = last_idnum;
1059 if (import_filename && revs.prune_data.nr)
1060 full_tree = 1;
1062 get_tags_and_duplicates(&revs.cmdline);
1064 if (prepare_revision_walk(&revs))
1065 die("revision walk setup failed");
1066 revs.diffopt.format_callback = show_filemodify;
1067 revs.diffopt.format_callback_data = &paths_of_changed_objects;
1068 DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
1069 while ((commit = get_revision(&revs))) {
1070 if (has_unshown_parent(commit)) {
1071 add_object_array(&commit->object, NULL, &commits);
1073 else {
1074 handle_commit(commit, &revs, &paths_of_changed_objects);
1075 handle_tail(&commits, &revs, &paths_of_changed_objects);
1079 handle_tags_and_duplicates();
1080 handle_deletes();
1082 if (export_filename && lastimportid != last_idnum)
1083 export_marks(export_filename);
1085 if (use_done_feature)
1086 printf("done\n");
1088 free_refspec(refspecs_nr, refspecs);
1090 return 0;