git-export-filter.c: avoid extra LFs and grok get-mark
[git-export-filter.git] / git-export-filter.c
blob4cd6a78e5fb171da25f8a6b46475c9d9dc80fd72
1 /*
3 git-export-filter.c -- filter/transform git fast-export data streams
4 Copyright (C) 2013,2014,2019 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #include <stdarg.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #define MAXLINE 2047
29 #define COPYSIZE 65536
31 #define SPACETAB " \t"
32 #define DIGITS "0123456789"
34 typedef struct transform_s {
35 char *from;
36 char *to;
37 } transform_t;
39 static const char *const gHelp =
40 #include "git-export-help.inc"
43 static const char *const gUsage =
44 "git-export-filter [--authors-file file] [--branches-file file]\n"
45 " [--convert-tagger id] [--require-authors] [--trunk-is-master]\n"
46 " [--strip-at-suffix] [--expand-renames] [in] > out\n"
47 "(use git-export-filter -v -h for detailed help)\n";
49 static const char *const gVersion =
50 "git-export-filter version 1.5.1\n";
52 static const char *me = "git-export-filter";
54 static const char *authorsfile = NULL;
55 static const char *branchesfile = NULL;
56 static const char *convertid = NULL;
57 static int opt_verbose = 0;
58 static int debug = 0;
59 static int opt_version = 0;
60 static int opt_help = 0;
61 static int opt_require = 0;
62 static int opt_trunk_is_master = 0;
63 static int opt_strip_at = 0;
64 static int opt_no_renames = 0;
65 static int opt_names = 0;
67 static char *pushline = NULL;
68 static char *copybuff;
70 int (*fout)(FILE *out, const char *fmt, ...);
71 size_t (*writeout)(const void *ptr, size_t size, size_t nitems, FILE *out);
73 static void processfile(FILE *in, FILE *out,
74 const transform_t *authors, size_t acount,
75 const transform_t *branches, size_t bcount,
76 const char *convertid);
78 static void setupme(const char *start)
80 if (start && *start) {
81 const char *last = strrchr(start, '/');
82 if (last && last[1])
83 me = last+1;
84 else
85 me = start;
89 static void die(const char *fmt, ...)
91 va_list args;
92 size_t len;
94 if (!fmt)
95 fmt="error";
96 va_start(args, fmt);
97 fflush(stdout);
98 fprintf(stderr, "%s: ", me);
99 vfprintf(stderr, fmt, args);
100 va_end(args);
101 len = strlen(fmt);
102 if (!len || fmt[len-1] != '\n')
103 fprintf(stderr, "\n");
104 fflush(stderr);
105 exit(1);
108 static int cmpxform(const void *_a, const void *_b)
110 const transform_t *a = (const transform_t *)_a;
111 const transform_t *b = (const transform_t *)_b;
112 return strcmp(a->from, b->from);
115 static void trimback(char *str, const char *chars)
117 size_t len;
118 if (!str) return;
119 len = strlen(str);
120 while (strchr(chars, str[--len]))
121 str[len] = 0;
124 static int read_transform_file(const char *type, FILE *f, transform_t **ans)
126 transform_t *xform = NULL;
127 int cnt = 0;
128 char fmt[16];
129 char line[MAXLINE+2];
130 sprintf(fmt, "%%%d[^\r\n]", MAXLINE+1);
131 for (;;) {
132 int e;
133 line[0] = 0;
134 e = fscanf(f, fmt, line);
135 if (e < 0) break;
136 if (e == 1) {
137 char *str;
138 if (strlen(line) > MAXLINE)
139 die("%s file line exceeded %d characters", type, MAXLINE);
140 str = line + strspn(line, SPACETAB);
141 if (*str && *str != '#') {
142 size_t idlen = strcspn(str, "=");
143 char *from;
144 if (!str[idlen])
145 die("invalid %s line (no '='): %s", type, str);
146 if (!idlen)
147 die("invalid %s line (empty before '='): %s", type, str);
148 str[idlen] = 0;
149 from = str + idlen + 1;
150 from += strspn(from, SPACETAB);
151 if (!*from)
152 die("invalid %s line (empty after '='): %s", type, str);
153 trimback(str, SPACETAB);
154 trimback(from, SPACETAB);
155 if (debug > 1)
156 fprintf(stderr, "FROM: %s TO: %s\n", str, from);
157 xform = realloc(xform, sizeof(transform_t) * (cnt + 1));
158 if (!xform)
159 die("out of memory allocating %s array", type);
160 xform[cnt].from = (char *)malloc(idlen + 1 + strlen(from) + 1);
161 if (!xform[cnt].from)
162 die("out of memory allocating %s array", type);
163 memcpy(xform[cnt].from, str, idlen + 1);
164 xform[cnt].to = xform[cnt].from + idlen + 1;
165 strcpy(xform[cnt].to, from);
166 ++cnt;
169 e = fscanf(f, "%*[\r\n]");
170 if (e < 0) break;
172 if (feof(f)) {
173 qsort(xform, cnt, sizeof(transform_t), cmpxform);
174 *ans = xform;
175 return cnt;
177 if (xform)
178 free(xform);
179 return -1;
182 static int foutnone(FILE *out, const char *fmt, ...)
184 (void)out;
185 (void)fmt;
186 return 0;
189 size_t writeoutnone(const void *ptr, size_t size, size_t nitems, FILE *out)
191 (void)ptr;
192 (void)size;
193 (void)out;
194 return nitems;
197 int main(int argc, char *argv[])
199 transform_t *authors = NULL;
200 int acount = 0;
201 transform_t *branches = NULL;
202 int bcount = 0;
203 FILE *inbinary = freopen(NULL, "rb", stdin);
204 FILE *outbinary = freopen(NULL, "wb", stdout);
205 int optind = 1;
207 fout = fprintf;
208 writeout = fwrite;
209 if (argc >= 1)
210 setupme(argv[0]);
211 copybuff = (char *)malloc(COPYSIZE);
212 if (!copybuff)
213 die("out of memory allocating copy buffer");
214 if (!inbinary)
215 die("freopen(NULL, \"rb\", stdin) failed");
216 if (!outbinary)
217 die("freopen(NULL, \"wb\", stdout) failed");
218 for (; optind < argc; ++optind) {
219 #define A argv[optind]
220 if (strcmp(A, "--authors-file") == 0 || strcmp(A, "-A") == 0) {
221 if (++optind >= argc || !A || !*A)
222 die("--authors-file requires a filename argument");
223 authorsfile = A;
224 continue;
226 if (strncmp(A, "--authors-file=", 15) == 0) {
227 const char *arg = A + 15;
228 if (!*arg)
229 die("--authors-file requires a filename argument");
230 authorsfile = arg;
231 continue;
233 if (strcmp(A, "--branches-file") == 0) {
234 if (++optind >= argc || !A || !*A)
235 die("--branches-file requires a filename argument");
236 branchesfile = A;
237 continue;
239 if (strncmp(A, "--branches-file=", 16) == 0) {
240 const char *arg = A + 16;
241 if (!*arg)
242 die("--branches-file requires a filename argument");
243 branchesfile = arg;
244 continue;
246 if (strcmp(A, "--convert-tagger") == 0) {
247 if (convertid)
248 die("--convert-tagger may only be given once");
249 if (++optind >= argc || !A || !*A)
250 die("--convert-tagger requires an argument");
251 convertid = A;
252 continue;
254 if (strncmp(A, "--convert-tagger=", 17) == 0) {
255 const char *arg = A + 17;
256 if (convertid)
257 die("--convert-tagger may only be given once");
258 if (!*arg)
259 die("--convert-tagger requires an argument");
260 convertid = A;
261 continue;
263 if (!strcmp(A, "--require-authors")) {
264 opt_require = 1;
265 continue;
267 if (!strcmp(A, "--trunk-is-master")) {
268 opt_trunk_is_master = 1;
269 continue;
271 if (!strcmp(A, "--strip-at-suffix")) {
272 opt_strip_at = 1;
273 continue;
275 if (!strcmp(A, "--expand-renames")) {
276 opt_no_renames = 1;
277 continue;
279 if (!strcmp(A, "--names")) {
280 opt_names = 1;
281 fout = foutnone;
282 writeout = writeoutnone;
283 continue;
285 if (!strcmp(A, "-V") || !strcmp(A, "--version")) {
286 opt_version = 1;
287 continue;
289 if (!strcmp(A, "-v") || !strcmp(A, "--verbose")) {
290 opt_verbose = 1;
291 continue;
293 if (!strcmp(A, "-h") || !strcmp(A, "--help")) {
294 opt_help = 1;
295 continue;
297 if (!strcmp(A, "-d") || !strcmp(A, "--debug")) {
298 ++debug;
299 continue;
301 if (strcmp(A, "--") == 0) {
302 ++optind;
303 break;
305 if (*A != '-' || !A[1])
306 break;
307 die("unknown option: %s", A);
308 #undef A
310 if (optind + 1 < argc)
311 die("no more than one non-option argument allowed (try -h)");
312 if (optind + 1 == argc && strcmp(argv[optind], "-")) {
313 inbinary = freopen(argv[optind], "rb", inbinary);
314 if (!inbinary)
315 die("cannot open file %s", argv[optind]);
317 if (opt_version)
318 printf("%s", gVersion);
319 if (opt_help)
320 printf("%s", opt_verbose ? gHelp : gUsage);
321 if (opt_version || opt_help)
322 exit(0);
323 if (opt_require && !authorsfile)
324 die("--require-authors requires the --authors-file option");
325 if (opt_names && (opt_require || authorsfile || branchesfile || convertid ||
326 opt_trunk_is_master || opt_strip_at || opt_no_renames))
327 die("--names may not be used together with any other options");
328 if (authorsfile) {
329 FILE *af = fopen(authorsfile, "rb");
330 if (!af)
331 die("cannot open authors file: %s", authorsfile);
332 acount = read_transform_file("authors", af, &authors);
333 fclose(af);
334 if (acount < 0)
335 die("invalid authors file format: %s", authorsfile);
336 if (debug && acount) {
337 int i;
338 for (i=0; i<acount; ++i)
339 fprintf(stderr, "%s=%s\n", authors[i].from, authors[i].to);
342 if (branchesfile) {
343 FILE *bf = fopen(branchesfile, "rb");
344 if (!bf)
345 die("cannot open branches file: %s", branchesfile);
346 bcount = read_transform_file("branches", bf, &branches);
347 fclose(bf);
348 if (bcount < 0)
349 die("invalid branches file format: %s", branchesfile);
350 if (debug && bcount) {
351 int i;
352 for (i=0; i<bcount; ++i)
353 fprintf(stderr, "%s->%s\n", branches[i].from, branches[i].to);
356 if (opt_trunk_is_master) {
357 branches = realloc(branches, sizeof(transform_t) * (bcount + 1));
358 if (!branches)
359 die("out of memory allocating branches array");
360 branches[bcount].from = "refs/heads/trunk";
361 branches[bcount].to = "refs/heads/master";
362 ++bcount;
363 qsort(branches, bcount, sizeof(transform_t), cmpxform);
366 processfile(inbinary, outbinary, authors, (size_t)acount, branches,
367 (size_t)bcount, convertid);
369 exit(0);
372 static char *nextline(FILE *in)
374 static char line[MAXLINE+2];
375 char fmt[16];
376 int e;
378 if (pushline) {
379 char *ans = pushline;
380 pushline = NULL;
381 if (*ans)
382 return ans;
384 sprintf(fmt, "%%%d[^\r\n]", MAXLINE+1);
385 line[0] = 0;
386 e = fscanf(in, fmt, line);
387 if (e < 0 && !feof(in))
388 die("error reading input");
389 if (e < 0)
390 return NULL;
391 if (strlen(line) > MAXLINE)
392 die("input line exceeded %d characters", MAXLINE);
393 do {
394 e = fgetc(in);
395 } while (e >= 0 && e != '\n');
396 if (e < 0 && !feof(in))
397 die("error reading input");
398 return line + strspn(line, SPACETAB);
401 static void processblob(FILE *in, FILE *out);
402 static void processcommit(FILE *in, FILE *out, const char *ref,
403 const transform_t *authors, size_t acount,
404 const transform_t *branches, size_t bcount);
405 static void processtag(FILE *in, FILE *out, const char *tag,
406 const transform_t *authors, size_t acount,
407 const transform_t *branches, size_t bcount,
408 const char *convertid);
409 static void processreset(FILE *in, FILE *out, const char *ref,
410 const transform_t *branches, size_t bcount);
412 static void processfile(FILE *in, FILE *out,
413 const transform_t *authors, size_t acount,
414 const transform_t *branches, size_t bcount,
415 const char *convertid)
417 const char *line;
418 for (;;) {
419 line = nextline(in);
420 if (!line) break;
421 if (!*line || *line == '#') continue;
422 if (strcmp(line, "blob") == 0)
423 processblob(in, out);
424 else if (strncmp(line, "commit ", 7) == 0)
425 processcommit(in, out, line+7, authors, acount, branches, bcount);
426 else if (strncmp(line, "tag ", 4) == 0)
427 processtag(in, out, line+4, authors, acount, branches, bcount, convertid);
428 else if (strncmp(line, "reset ", 6) == 0)
429 processreset(in, out, line+6, branches, bcount);
430 else if (!strcmp(line, "checkpoint") || !strcmp(line, "done") ||
431 !strncmp(line, "progress ", 9))
432 fout(out, "%s\n\n", line);
433 else if (!strncmp(line, "get-mark ", 9) || !strncmp(line, "cat-blob ", 9) ||
434 !strncmp(line, "ls ", 3) || !strncmp(line, "feature ", 8) ||
435 !strncmp(line, "option ", 7))
436 fout(out, "%s\n", line);
437 else
438 die("unrecognized input command: %s", line);
442 static const char *translate(const char *in, const transform_t *x, size_t c)
444 const transform_t *t;
445 transform_t search;
446 search.from = (char *)in;
447 search.to = NULL;
448 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
449 return t ? t->to : in;
452 #define translateref(i,x,c) translate((i),(x),(c))
454 static const char *translateuser(const char *in, const transform_t *x, size_t c)
456 const transform_t *t;
457 transform_t search;
458 search.from = (char *)in;
459 search.to = NULL;
460 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
461 if (!t && opt_strip_at && strchr(in, '@')) {
462 char user[MAXLINE+1];
463 strcpy(user, in);
464 *strchr(user, '@') = 0;
465 search.from = user;
466 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
468 return t ? t->to : in;
471 static const char *translatetag(const char *in, const transform_t *x, size_t c)
473 const transform_t *t;
474 transform_t search = {NULL, NULL};
475 char *temptag = malloc(10 + strlen(in) + 1);
476 if (!temptag)
477 die("out of memory");
478 sprintf(temptag, "refs/tags/%s", in);
479 search.from = temptag;
480 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
481 if (!t || strncmp(t->to, "refs/tags/", 10) != 0)
482 return in;
483 return t->to + 10;
486 static int splitauthor(char *line, char **name, char **email, char **when)
488 char *lt = strchr(line, '<');
489 char *gt = strchr(line, '>');
490 if (!lt || !gt || gt <= lt)
491 return 0;
492 *lt = 0;
493 *name = line;
494 *gt = 0;
495 *email = lt+1;
496 *when = gt+1;
497 if (**when == ' ')
498 ++*when;
499 return 1;
502 static void copydatapart(FILE *in, FILE *out, const char *data, int nolf)
504 char *line;
505 char lastchar = 0;
506 size_t l = strlen(data);
507 if (strspn(data, DIGITS) == l) {
508 size_t dlen;
509 if (!l)
510 die("Invalid data line: data %s", data);
511 dlen = (size_t)strtol(data, NULL, 10);
512 while (dlen) {
513 size_t cnt, amnt = COPYSIZE;
514 if (amnt > dlen)
515 amnt = dlen;
516 cnt = fread(copybuff, 1, amnt, in);
517 if (!cnt)
518 break;
519 if (out)
520 if (writeout(copybuff, cnt, 1, out) != 1)
521 die("failed writing data to output");
522 if (cnt == dlen)
523 lastchar = copybuff[cnt - 1];
524 dlen -= cnt;
526 if (dlen)
527 die("unexpected EOF reading data %s", data);
528 if (out && (!nolf || lastchar != '\n'))
529 fout(out, "\n");
530 } else if (l < 3 || data[0] != '<' || data[1] != '<')
531 die("Invalid data line: data %s", data);
532 else {
533 fprintf(stderr, "%s: warning: data << not fully supported\n", me);
534 for (;;) {
535 const char *line = nextline(in);
536 if (!line)
537 die("unexpected EOF reading data %s", data);
538 if (out)
539 fout(out, "%s\n", line);
540 if (strcmp(line, data+2) == 0) {
541 if (out && !nolf)
542 fout(out, "\n");
543 break;
547 line = nextline(in);
548 if (line && *line)
549 pushline = line;
552 static void copydata(FILE *in, FILE *out, int nolf, const char *err)
554 const char *line = nextline(in);
555 if (!err)
556 err = "missing data line";
557 if (!line || strncmp(line, "data ", 5) != 0)
558 die(err);
559 if (out)
560 fout(out, "%s\n", line);
561 copydatapart(in, out, line+5, nolf);
564 static void processblob(FILE *in, FILE *out)
566 const char *line = nextline(in);
567 if (!line)
568 die("error reading blob header");
569 fout(out, "blob\n");
570 if (strncmp(line, "mark ", 5) == 0) {
571 fout(out, "%s\n", line);
572 line = nextline(in);
573 if (!line)
574 die("error reading blob header");
576 if (strncmp(line, "data ", 5) != 0)
577 die("blob missing data line");
578 fout(out, "%s\n", line);
579 copydatapart(in, out, line+5, 0);
582 static void processreset(FILE *in, FILE *out, const char *ref,
583 const transform_t *branches, size_t bcount)
585 char *line;
586 const char *newref = translateref(ref, branches, bcount);
587 fout(out, "reset %s\n", newref);
588 line = nextline(in);
589 if (strncmp(line, "from ", 5) == 0)
590 fout(out, "%s\n\n", line);
591 else
592 pushline = line;
595 static void processtag(FILE *in, FILE *out, const char *tag,
596 const transform_t *authors, size_t acount,
597 const transform_t *branches, size_t bcount,
598 const char *convertid)
600 char *name = NULL, *email = NULL, *when = NULL;
601 char *line;
602 const char *newref = translatetag(tag, branches, bcount);
603 const char *newauth;
604 char tagline[MAXLINE+1];
605 char fromline[MAXLINE+1];
606 sprintf(tagline, "tag %s\n", newref);
607 line = nextline(in);
608 if (!line || strncmp(line, "from ", 5) != 0)
609 die("tag missing from line");
610 sprintf(fromline, "%s\n", line);
611 line = nextline(in);
612 if (!line || strncmp(line, "tagger ", 7) != 0)
613 die("tag missing tagger line");
614 if (!splitauthor(line+7, &name, &email, &when))
615 die("tag has bad tagger line");
616 if (opt_names)
617 fprintf(out, "%s<%s>\n", name, email);
618 if (convertid && strcmp(convertid, email) == 0) {
619 fout(out, "reset refs/tags/%s", tagline+4);
620 fout(out, "%s\n", fromline);
621 out = NULL;
622 } else {
623 fout(out, "%s%s", tagline, fromline);
624 newauth = translateuser(email, authors, acount);
625 if (newauth != email)
626 fout(out, "tagger %s %s\n", newauth, when);
627 else {
628 if (opt_require)
629 die("missing authors file author: \"%s\"", email);
630 fout(out, "tagger %s<%s> %s\n", name, email, when);
633 copydata(in, out, 0, "tag missing data line");
636 static const char *find_second_space(const char *line)
638 if (!line || !*line || *line == ' ' || line[1] != ' ' || line[2] == ' ')
639 return NULL;
640 if (line[2] != '"')
641 return strchr(line + 2, ' ');
642 line += 3;
643 for (;;) {
644 line += strcspn(line, "\\\"");
645 if (!*line)
646 return NULL;
647 if (*line == '\\') {
648 if (!line[1])
649 return NULL;
650 line += 2;
651 continue;
653 return line[1] == ' ' ? line + 1 : NULL;
657 static int is_inline_modify(const char *line)
659 if (strncmp(line, "M ", 2) != 0)
660 return 0;
661 line += 2;
662 line += strspn(line, DIGITS);
663 return strncmp(line, " inline ", 8) == 0;
666 static void processcommit(FILE *in, FILE *out, const char *ref,
667 const transform_t *authors, size_t acount,
668 const transform_t *branches, size_t bcount)
670 char *name = NULL, *email = NULL, *when = NULL;
671 char *line;
672 const char *newref = translateref(ref, branches, bcount);
673 const char *newauth;
674 fout(out, "commit %s\n", newref);
675 line = nextline(in);
676 if (!line)
677 die("error reading commit header");
678 if (strncmp(line, "mark ", 5) == 0) {
679 fout(out, "%s\n", line);
680 line = nextline(in);
681 if (!line)
682 die("error reading commit header");
684 if (strncmp(line, "author ", 7) == 0) {
685 if (!splitauthor(line+7, &name, &email, &when))
686 die("commit has bad author line");
687 if (opt_names)
688 fprintf(out, "%s<%s>\n", name, email);
689 newauth = translateuser(email, authors, acount);
690 if (newauth != email)
691 fout(out, "author %s %s\n", newauth, when);
692 else {
693 if (opt_require)
694 die("missing authors file author: \"%s\"", email);
695 fout(out, "author %s<%s> %s\n", name, email, when);
697 line = nextline(in);
698 if (!line)
699 die("error reading commit header");
701 if (strncmp(line, "committer ", 10) != 0)
702 die("commit missing committer line");
703 if (!splitauthor(line+10, &name, &email, &when))
704 die("commit has bad committer line");
705 if (opt_names)
706 fprintf(out, "%s<%s>\n", name, email);
707 newauth = translateuser(email, authors, acount);
708 if (newauth != email)
709 fout(out, "committer %s %s\n", newauth, when);
710 else {
711 if (opt_require)
712 die("missing authors file author: \"%s\"", email);
713 fout(out, "committer %s<%s> %s\n", name, email, when);
715 copydata(in, out, 1, "commit missing data line");
716 line = nextline(in);
717 if (!line)
718 die("error reading commit header");
719 if (strncmp(line, "from ", 5) == 0) {
720 fout(out, "%s\n", line);
721 line = nextline(in);
722 if (!line)
723 die("error reading commit header");
725 while (strncmp(line, "merge ", 6) == 0) {
726 fout(out, "%s\n", line);
727 line = nextline(in);
728 if (!line)
729 die("error reading commit header");
731 while (!strcmp(line, "deleteall") || !strncmp(line, "M ", 2) ||
732 !strncmp(line, "D ", 2) || !strncmp(line, "C ", 2) ||
733 !strncmp(line, "R ", 2) || !strncmp(line, "N ", 2) ||
734 !strncmp(line, "cat-blob ", 9) || !strncmp(line, "ls ", 3) ||
735 !strncmp(line, "get-mark ", 9)) {
736 if (!opt_no_renames || strncmp(line, "R ", 2)) {
737 fout(out, "%s\n", line);
738 } else {
739 /* expand rename into copy + delete */
740 const char *space2 = find_second_space(line);
741 if (!space2)
742 die("error reading 'R' line");
743 fout(out, "C %s\n", line+2);
744 fout(out, "D %.*s\n", (int)(space2 - line) - 2, line+2);
746 if (strncmp(line, "N inline", 8) == 0 || is_inline_modify(line))
747 copydata(in, out, 1, "inline N or M missing data line");
748 line = nextline(in);
749 if (!line)
750 die("error reading commit header");
752 pushline = line;
753 fout(out, "\n");