Now fully implemented and operational
[git-export-filter.git] / git-export-filter.c
blobd559f58b64cf2380ccfd6e3d4438b2daf5b63ec7
1 /* git-export-filter.c -- filter/transform git export streams
2 Copyright (c) 2013 Kyle J. McKay. All rights reserved.
3 */
5 #include <stdarg.h>
6 #include <stddef.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
11 #define MAXLINE 2047
12 #define COPYSIZE 65536
14 #define SPACETAB " \t"
15 #define DIGITS "0123456789"
17 typedef struct transform_s {
18 char *from;
19 char *to;
20 } transform_t;
22 static const char *const gHelp =
23 #include "git-export-help.inc"
26 static const char *const gUsage =
27 "git-export-filter [--authors file] [--branches file] [--convert-tagger id]\n"
28 " (in | < in) > out\n"
29 "(use git-export-filter -v -h for detailed help)\n";
31 static const char *const gVersion =
32 "git-export-filter version 1.0\n";
34 static const char *me = "git-export-filter";
36 static const char *authorsfile = NULL;
37 static const char *branchesfile = NULL;
38 static const char *convertid = NULL;
39 static int opt_verbose = 0;
40 static int debug = 0;
41 static int opt_version = 0;
42 static int opt_help = 0;
44 static char *pushline = NULL;
45 static char *copybuff;
47 static void processfile(FILE *in, FILE *out,
48 const transform_t *authors, size_t acount,
49 const transform_t *branches, size_t bcount,
50 const char *convertid);
52 static void setupme(const char *start)
54 if (start && *start) {
55 const char *last = strrchr(start, '/');
56 if (last && last[1])
57 me = last+1;
58 else
59 me = start;
63 static void die(const char *fmt, ...)
65 va_list args;
66 size_t len;
68 if (!fmt)
69 fmt="error";
70 va_start(args, fmt);
71 fflush(stdout);
72 fprintf(stderr, "%s: ", me);
73 vfprintf(stderr, fmt, args);
74 va_end(args);
75 len = strlen(fmt);
76 if (!len || fmt[len-1] != '\n')
77 fprintf(stderr, "\n");
78 fflush(stderr);
79 exit(1);
82 static int cmpxform(const void *_a, const void *_b)
84 const transform_t *a = (const transform_t *)_a;
85 const transform_t *b = (const transform_t *)_b;
86 return strcmp(a->from, b->from);
89 static void trimback(char *str, const char *chars)
91 size_t len;
92 if (!str) return;
93 len = strlen(str);
94 while (strchr(chars, str[--len]))
95 str[len] = 0;
98 static int read_transform_file(const char *type, FILE *f, transform_t **ans)
100 transform_t *xform = NULL;
101 int cnt = 0;
102 char fmt[16];
103 char line[MAXLINE+2];
104 sprintf(fmt, "%%%d[^\r\n]", MAXLINE+1);
105 for (;;) {
106 int e;
107 line[0] = 0;
108 e = fscanf(f, fmt, line);
109 if (e < 0) break;
110 if (e == 1) {
111 char *str;
112 if (strlen(line) > MAXLINE)
113 die("%s file line exceeded %d characters", type, MAXLINE);
114 str = line + strspn(line, SPACETAB);
115 if (*str && *str != '#') {
116 size_t idlen = strcspn(str, "=");
117 char *from;
118 if (!str[idlen])
119 die("invalid %s line (no '='): %s", type, str);
120 if (!idlen)
121 die("invalid %s line (empty before '='): %s", type, str);
122 str[idlen] = 0;
123 from = str + idlen + 1;
124 from += strspn(from, SPACETAB);
125 if (!*from)
126 die("invalid %s line (empty after '='): %s", type, str);
127 trimback(str, SPACETAB);
128 trimback(from, SPACETAB);
129 if (debug > 1)
130 fprintf(stderr, "FROM: %s TO: %s\n", str, from);
131 xform = realloc(xform, sizeof(transform_t) * (cnt + 1));
132 if (!xform)
133 die("out of memory allocating %s array", type);
134 xform[cnt].from = (char *)malloc(idlen + 1 + strlen(from) + 1);
135 if (!xform[cnt].from)
136 die("out of memory allocating %s array", type);
137 memcpy(xform[cnt].from, str, idlen + 1);
138 xform[cnt].to = xform[cnt].from + idlen + 1;
139 strcpy(xform[cnt].to, from);
140 ++cnt;
143 e = fscanf(f, "%*[\r\n]");
144 if (e < 0) break;
146 if (feof(f)) {
147 qsort(xform, cnt, sizeof(transform_t), cmpxform);
148 *ans = xform;
149 return cnt;
151 if (xform)
152 free(xform);
153 return -1;
156 int main(int argc, char *argv[])
158 transform_t *authors = NULL;
159 int acount = 0;
160 transform_t *branches = NULL;
161 int bcount = 0;
162 FILE *inbinary = freopen(NULL, "rb", stdin);
163 FILE *outbinary = freopen(NULL, "wb", stdout);
164 int optind = 1;
166 if (argc >= 1)
167 setupme(argv[0]);
168 copybuff = (char *)malloc(COPYSIZE);
169 if (!copybuff)
170 die("out of memory allocating copy buffer");
171 if (!inbinary)
172 die("freopen(NULL, \"rb\", stdin) failed");
173 if (!outbinary)
174 die("freopen(NULL, \"wb\", stdout) failed");
175 for (; optind < argc; ++optind) {
176 #define A argv[optind]
177 if (strcmp(A, "--authors") == 0) {
178 if (++optind >= argc || !A || !*A)
179 die("--authors requires a filename argument");
180 authorsfile = A;
181 continue;
183 if (strcmp(A, "--branches") == 0) {
184 if (++optind >= argc || !A || !*A)
185 die("--branches requires a filename argument");
186 branchesfile = A;
187 continue;
189 if (strcmp(A, "--convert-tagger") == 0) {
190 if (convertid)
191 die("--convert-tagger may only be given once");
192 if (++optind >= argc || !A || !*A)
193 die("--convert-tagger requires an argument");
194 convertid = A;
195 continue;
197 if (!strcmp(A, "-V") || !strcmp(A, "--version")) {
198 opt_version = 1;
199 continue;
201 if (!strcmp(A, "-v") || !strcmp(A, "--verbose")) {
202 opt_verbose = 1;
203 continue;
205 if (!strcmp(A, "-h") || !strcmp(A, "--help")) {
206 opt_help = 1;
207 continue;
209 if (!strcmp(A, "-d") || !strcmp(A, "--debug")) {
210 ++debug;
211 continue;
213 if (strcmp(A, "--") == 0) {
214 ++optind;
215 break;
217 if (*A != '-' || !A[1])
218 break;
219 die("unknown option: %s", A);
220 #undef A
222 if (optind + 1 < argc)
223 die("no more than one non-option argument allowed (try -h)");
224 if (optind + 1 == argc) {
225 inbinary = freopen(argv[optind], "rb", inbinary);
227 if (opt_version)
228 printf("%s", gVersion);
229 if (opt_help) {
230 printf("%s", opt_verbose ? gHelp : gUsage);
231 exit(0);
234 if (authorsfile) {
235 FILE *af = fopen(authorsfile, "rb");
236 if (!af)
237 die("cannot open authors file: %s", authorsfile);
238 acount = read_transform_file("authors", af, &authors);
239 fclose(af);
240 if (acount < 0)
241 die("invalid authors file format: %s", authorsfile);
242 if (debug && acount) {
243 int i;
244 for (i=0; i<acount; ++i)
245 fprintf(stderr, "%s=%s\n", authors[i].from, authors[i].to);
248 if (branchesfile) {
249 FILE *bf = fopen(branchesfile, "rb");
250 if (!bf)
251 die("cannot open branches file: %s", branchesfile);
252 bcount = read_transform_file("branches", bf, &branches);
253 fclose(bf);
254 if (bcount < 0)
255 die("invalid branches file format: %s", branchesfile);
256 if (debug && bcount) {
257 int i;
258 for (i=0; i<bcount; ++i)
259 fprintf(stderr, "%s->%s\n", branches[i].from, branches[i].to);
263 processfile(inbinary, outbinary, authors, (size_t)acount, branches,
264 (size_t)bcount, convertid);
266 exit(0);
269 static char *nextline(FILE *in)
271 static char line[MAXLINE+2];
272 char fmt[16];
273 int e;
275 if (pushline) {
276 char *ans = pushline;
277 pushline = NULL;
278 if (*ans)
279 return ans;
281 sprintf(fmt, "%%%d[^\r\n]", MAXLINE+1);
282 line[0] = 0;
283 e = fscanf(in, fmt, line);
284 if (e < 0 && !feof(in))
285 die("error reading input");
286 if (e < 0)
287 return NULL;
288 if (strlen(line) > MAXLINE)
289 die("input line exceeded %d characters", MAXLINE);
290 e = fscanf(in, "%*[\r\n]");
291 if (e < 0 && !feof(in))
292 die("error reading input");
293 return line + strspn(line, SPACETAB);
296 static void processblob(FILE *in, FILE *out);
297 static void processcommit(FILE *in, FILE *out, const char *ref,
298 const transform_t *authors, size_t acount,
299 const transform_t *branches, size_t bcount);
300 static void processtag(FILE *in, FILE *out, const char *tag,
301 const transform_t *authors, size_t acount,
302 const transform_t *branches, size_t bcount,
303 const char *convertid);
304 static void processreset(FILE *in, FILE *out, const char *ref,
305 const transform_t *branches, size_t bcount);
307 static void processfile(FILE *in, FILE *out,
308 const transform_t *authors, size_t acount,
309 const transform_t *branches, size_t bcount,
310 const char *convertid)
312 const char *line;
313 for (;;) {
314 line = nextline(in);
315 if (!line) break;
316 if (!*line || *line == '#') continue;
317 if (strcmp(line, "blob") == 0)
318 processblob(in, out);
319 else if (strncmp(line, "commit ", 7) == 0)
320 processcommit(in, out, line+7, authors, acount, branches, bcount);
321 else if (strncmp(line, "tag ", 4) == 0)
322 processtag(in, out, line+4, authors, acount, branches, bcount, convertid);
323 else if (strncmp(line, "reset ", 6) == 0)
324 processreset(in, out, line+6, branches, bcount);
325 else if (!strcmp(line, "checkpoint") || !strcmp(line, "done"))
326 fprintf(out, "%s\n\n", line);
327 else if (!strncmp(line, "progress ", 9) ||
328 !strncmp(line, "cat-blob ", 9) || !strncmp(line, "ls ", 3) ||
329 !strncmp(line, "feature ", 8) || !strncmp(line, "option ", 7))
330 fprintf(out, "%s\n\n", line);
331 else
332 die("unrecognized input command: %s", line);
336 static const char *translate(const char *in, const transform_t *x, size_t c)
338 const transform_t *t;
339 transform_t search = {(char *)in, NULL};
340 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
341 return t ? t->to : in;
344 static const char *translatetag(const char *in, const transform_t *x, size_t c)
346 const transform_t *t;
347 transform_t search = {NULL, NULL};
348 char *temptag = malloc(10 + strlen(in) + 1);
349 if (!temptag)
350 die("out of memory");
351 sprintf(temptag, "refs/tags/%s", in);
352 search.from = temptag;
353 t = (transform_t *)bsearch(&search, x, c, sizeof(transform_t), cmpxform);
354 if (!t || strncmp(t->to, "refs/tags/", 10) != 0)
355 return in;
356 return t->to + 10;
359 static int splitauthor(char *line, char **name, char **email, char **when)
361 char *lt = strchr(line, '<');
362 char *gt = strchr(line, '>');
363 if (!lt || !gt || gt <= lt)
364 return 0;
365 *lt = 0;
366 *name = line;
367 *gt = 0;
368 *email = lt+1;
369 *when = gt+1;
370 if (**when == ' ')
371 ++*when;
372 return 1;
375 static void copydatapart(FILE *in, FILE *out, const char *data, int nolf)
377 size_t l = strlen(data);
378 if (strspn(data, DIGITS) == l) {
379 size_t dlen;
380 if (!l)
381 die("Invalid data line: data %s", data);
382 dlen = (size_t)strtol(data, NULL, 10);
383 while (dlen) {
384 size_t cnt, amnt = COPYSIZE;
385 if (amnt > dlen)
386 amnt = dlen;
387 cnt = fread(copybuff, 1, amnt, in);
388 if (!cnt)
389 break;
390 if (out)
391 if (fwrite(copybuff, cnt, 1, out) != 1)
392 die("failed writing data to output");
393 dlen -= cnt;
395 if (dlen)
396 die("unexpected EOF reading data %s", data);
397 if (out && !nolf)
398 fprintf(out, "\n");
399 } else if (l < 3 || data[0] != '<' || data[1] != '<')
400 die("Invalid data line: data %s", data);
401 else {
402 fprintf(stderr, "%s: warning: data << not fully supported\n", me);
403 for (;;) {
404 const char *line = nextline(in);
405 if (!line)
406 die("unexpected EOF reading data %s", data);
407 if (out)
408 fprintf(out, "%s\n", line);
409 if (strcmp(line, data+2) == 0) {
410 if (out && !nolf)
411 fprintf(out, "\n");
412 return;
418 static void copydata(FILE *in, FILE *out, int nolf, const char *err)
420 const char *line = nextline(in);
421 if (!err)
422 err = "missing data line";
423 if (!line || strncmp(line, "data ", 5) != 0)
424 die(err);
425 if (out)
426 fprintf(out, "%s\n", line);
427 copydatapart(in, out, line+5, nolf);
430 static void processblob(FILE *in, FILE *out)
432 const char *line = nextline(in);
433 if (!line)
434 die("error reading blob header");
435 fprintf(out, "blob\n");
436 if (strncmp(line, "mark ", 5) == 0) {
437 fprintf(out, "%s\n", line);
438 line = nextline(in);
439 if (!line)
440 die("error reading blob header");
442 if (strncmp(line, "data ", 5) != 0)
443 die("blob missing data line");
444 fprintf(out, "%s\n", line);
445 copydatapart(in, out, line+5, 0);
448 static void processreset(FILE *in, FILE *out, const char *ref,
449 const transform_t *branches, size_t bcount)
451 char *line;
452 const char *newref = translate(ref, branches, bcount);
453 fprintf(out, "reset %s\n", newref);
454 line = nextline(in);
455 if (strncmp(line, "from ", 5) == 0)
456 fprintf(out, "%s\n\n", line);
457 else
458 pushline = line;
461 static void processtag(FILE *in, FILE *out, const char *tag,
462 const transform_t *authors, size_t acount,
463 const transform_t *branches, size_t bcount,
464 const char *convertid)
466 char *name = NULL, *email = NULL, *when = NULL;
467 char *line;
468 const char *newref = translatetag(tag, branches, bcount);
469 const char *newauth;
470 char tagline[MAXLINE+1];
471 char fromline[MAXLINE+1];
472 sprintf(tagline, "tag %s\n", newref);
473 line = nextline(in);
474 if (!line || strncmp(line, "from ", 5) != 0)
475 die("tag missing from line");
476 sprintf(fromline, "%s\n", line);
477 line = nextline(in);
478 if (!line || strncmp(line, "tagger ", 7) != 0)
479 die("tag missing tagger line");
480 if (!splitauthor(line+7, &name, &email, &when))
481 die("tag has bad tagger line");
482 if (convertid && strcmp(convertid, email) == 0) {
483 fprintf(out, "reset refs/tags/%s", tagline+4);
484 fprintf(out, "%s\n", fromline);
485 out = NULL;
486 } else {
487 fprintf(out, "%s%s", tagline, fromline);
488 newauth = translate(email, authors, acount);
489 if (newauth != email)
490 fprintf(out, "tagger %s %s\n", newauth, when);
491 else
492 fprintf(out, "tagger %s<%s> %s\n", name, email, when);
494 copydata(in, out, 0, "tag missing data line");
497 static void processcommit(FILE *in, FILE *out, const char *ref,
498 const transform_t *authors, size_t acount,
499 const transform_t *branches, size_t bcount)
501 char *name = NULL, *email = NULL, *when = NULL;
502 char *line;
503 const char *newref = translate(ref, branches, bcount);
504 const char *newauth;
505 fprintf(out, "commit %s\n", newref);
506 line = nextline(in);
507 if (!line)
508 die("error reading commit header");
509 if (strncmp(line, "mark ", 5) == 0) {
510 fprintf(out, "%s\n", line);
511 line = nextline(in);
512 if (!line)
513 die("error reading commit header");
515 if (strncmp(line, "author ", 7) == 0) {
516 if (!splitauthor(line+7, &name, &email, &when))
517 die("commit has bad author line");
518 newauth = translate(email, authors, acount);
519 if (newauth != email)
520 fprintf(out, "author %s %s\n", newauth, when);
521 else
522 fprintf(out, "author %s<%s> %s\n", name, email, when);
523 line = nextline(in);
524 if (!line)
525 die("error reading commit header");
527 if (strncmp(line, "committer ", 10) != 0)
528 die("commit missing committer line");
529 if (!splitauthor(line+10, &name, &email, &when))
530 die("commit has bad committer line");
531 newauth = translate(email, authors, acount);
532 if (newauth != email)
533 fprintf(out, "committer %s %s\n", newauth, when);
534 else
535 fprintf(out, "committer %s<%s> %s\n", name, email, when);
536 copydata(in, out, 1, "commit missing data line");
537 line = nextline(in);
538 if (!line)
539 die("error reading commit header");
540 if (strncmp(line, "from ", 5) == 0) {
541 fprintf(out, "%s\n", line);
542 line = nextline(in);
543 if (!line)
544 die("error reading commit header");
546 while (strncmp(line, "merge ", 6) == 0) {
547 fprintf(out, "%s\n", line);
548 line = nextline(in);
549 if (!line)
550 die("error reading commit header");
552 while (!strcmp(line, "deleteall") || !strncmp(line, "M ", 2) ||
553 !strncmp(line, "D ", 2) || !strncmp(line, "C ", 2) ||
554 !strncmp(line, "R ", 2) || !strncmp(line, "N ", 2)) {
555 fprintf(out, "%s\n", line);
556 line = nextline(in);
557 if (!line)
558 die("error reading commit header");
560 pushline = line;
561 fprintf(out, "\n");