Rewrite convert_to_{git,working_tree} to use strbuf's.
[git/dscho.git] / convert.c
blob00a341c595ebb6ac6965001cce84d3d4e65f60c3
1 #include "cache.h"
2 #include "attr.h"
3 #include "run-command.h"
4 #include "strbuf.h"
6 /*
7 * convert.c - convert a file when checking it out and checking it in.
9 * This should use the pathname to decide on whether it wants to do some
10 * more interesting conversions (automatic gzip/unzip, general format
11 * conversions etc etc), but by default it just does automatic CRLF<->LF
12 * translation when the "auto_crlf" option is set.
15 #define CRLF_GUESS (-1)
16 #define CRLF_BINARY 0
17 #define CRLF_TEXT 1
18 #define CRLF_INPUT 2
20 struct text_stat {
21 /* CR, LF and CRLF counts */
22 unsigned cr, lf, crlf;
24 /* These are just approximations! */
25 unsigned printable, nonprintable;
28 static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
30 unsigned long i;
32 memset(stats, 0, sizeof(*stats));
34 for (i = 0; i < size; i++) {
35 unsigned char c = buf[i];
36 if (c == '\r') {
37 stats->cr++;
38 if (i+1 < size && buf[i+1] == '\n')
39 stats->crlf++;
40 continue;
42 if (c == '\n') {
43 stats->lf++;
44 continue;
46 if (c == 127)
47 /* DEL */
48 stats->nonprintable++;
49 else if (c < 32) {
50 switch (c) {
51 /* BS, HT, ESC and FF */
52 case '\b': case '\t': case '\033': case '\014':
53 stats->printable++;
54 break;
55 default:
56 stats->nonprintable++;
59 else
60 stats->printable++;
65 * The same heuristics as diff.c::mmfile_is_binary()
67 static int is_binary(unsigned long size, struct text_stat *stats)
70 if ((stats->printable >> 7) < stats->nonprintable)
71 return 1;
73 * Other heuristics? Average line length might be relevant,
74 * as might LF vs CR vs CRLF counts..
76 * NOTE! It might be normal to have a low ratio of CRLF to LF
77 * (somebody starts with a LF-only file and edits it with an editor
78 * that adds CRLF only to lines that are added..). But do we
79 * want to support CR-only? Probably not.
81 return 0;
84 static int crlf_to_git(const char *path, const char *src, size_t len,
85 struct strbuf *buf, int action)
87 struct text_stat stats;
88 char *dst;
90 if ((action == CRLF_BINARY) || !auto_crlf || !len)
91 return 0;
93 gather_stats(src, len, &stats);
94 /* No CR? Nothing to convert, regardless. */
95 if (!stats.cr)
96 return 0;
98 if (action == CRLF_GUESS) {
100 * We're currently not going to even try to convert stuff
101 * that has bare CR characters. Does anybody do that crazy
102 * stuff?
104 if (stats.cr != stats.crlf)
105 return 0;
108 * And add some heuristics for binary vs text, of course...
110 if (is_binary(len, &stats))
111 return 0;
114 strbuf_grow(buf, len);
115 dst = buf->buf;
116 if (action == CRLF_GUESS) {
118 * If we guessed, we already know we rejected a file with
119 * lone CR, and we can strip a CR without looking at what
120 * follow it.
122 do {
123 unsigned char c = *src++;
124 if (c != '\r')
125 *dst++ = c;
126 } while (--len);
127 } else {
128 do {
129 unsigned char c = *src++;
130 if (! (c == '\r' && (1 < len && *src == '\n')))
131 *dst++ = c;
132 } while (--len);
134 strbuf_setlen(buf, dst - buf->buf);
135 return 1;
138 static int crlf_to_worktree(const char *path, const char *src, size_t len,
139 struct strbuf *buf, int action)
141 char *to_free = NULL;
142 struct text_stat stats;
144 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
145 auto_crlf <= 0)
146 return 0;
148 if (!len)
149 return 0;
151 gather_stats(src, len, &stats);
153 /* No LF? Nothing to convert, regardless. */
154 if (!stats.lf)
155 return 0;
157 /* Was it already in CRLF format? */
158 if (stats.lf == stats.crlf)
159 return 0;
161 if (action == CRLF_GUESS) {
162 /* If we have any bare CR characters, we're not going to touch it */
163 if (stats.cr != stats.crlf)
164 return 0;
166 if (is_binary(len, &stats))
167 return 0;
170 /* are we "faking" in place editing ? */
171 if (src == buf->buf)
172 to_free = strbuf_detach(buf);
174 strbuf_grow(buf, len + stats.lf - stats.crlf);
175 for (;;) {
176 const char *nl = memchr(src, '\n', len);
177 if (!nl)
178 break;
179 if (nl > src && nl[-1] == '\r') {
180 strbuf_add(buf, src, nl + 1 - src);
181 } else {
182 strbuf_add(buf, src, nl - src);
183 strbuf_addstr(buf, "\r\n");
185 len -= nl + 1 - src;
186 src = nl + 1;
188 strbuf_add(buf, src, len);
190 free(to_free);
191 return 1;
194 static int filter_buffer(const char *path, const char *src,
195 unsigned long size, const char *cmd)
198 * Spawn cmd and feed the buffer contents through its stdin.
200 struct child_process child_process;
201 int pipe_feed[2];
202 int write_err, status;
204 memset(&child_process, 0, sizeof(child_process));
206 if (pipe(pipe_feed) < 0) {
207 error("cannot create pipe to run external filter %s", cmd);
208 return 1;
211 child_process.pid = fork();
212 if (child_process.pid < 0) {
213 error("cannot fork to run external filter %s", cmd);
214 close(pipe_feed[0]);
215 close(pipe_feed[1]);
216 return 1;
218 if (!child_process.pid) {
219 dup2(pipe_feed[0], 0);
220 close(pipe_feed[0]);
221 close(pipe_feed[1]);
222 execlp("sh", "sh", "-c", cmd, NULL);
223 return 1;
225 close(pipe_feed[0]);
227 write_err = (write_in_full(pipe_feed[1], src, size) < 0);
228 if (close(pipe_feed[1]))
229 write_err = 1;
230 if (write_err)
231 error("cannot feed the input to external filter %s", cmd);
233 status = finish_command(&child_process);
234 if (status)
235 error("external filter %s failed %d", cmd, -status);
236 return (write_err || status);
239 static int apply_filter(const char *path, const char *src, size_t len,
240 struct strbuf *dst, const char *cmd)
243 * Create a pipeline to have the command filter the buffer's
244 * contents.
246 * (child --> cmd) --> us
248 int pipe_feed[2];
249 int status, ret = 1;
250 struct child_process child_process;
251 struct strbuf nbuf;
253 if (!cmd)
254 return 0;
256 memset(&child_process, 0, sizeof(child_process));
258 if (pipe(pipe_feed) < 0) {
259 error("cannot create pipe to run external filter %s", cmd);
260 return 0;
263 fflush(NULL);
264 child_process.pid = fork();
265 if (child_process.pid < 0) {
266 error("cannot fork to run external filter %s", cmd);
267 close(pipe_feed[0]);
268 close(pipe_feed[1]);
269 return 0;
271 if (!child_process.pid) {
272 dup2(pipe_feed[1], 1);
273 close(pipe_feed[0]);
274 close(pipe_feed[1]);
275 exit(filter_buffer(path, src, len, cmd));
277 close(pipe_feed[1]);
279 strbuf_init(&nbuf, 0);
280 if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
281 error("read from external filter %s failed", cmd);
282 ret = 0;
284 if (close(pipe_feed[0])) {
285 ret = error("read from external filter %s failed", cmd);
286 ret = 0;
288 status = finish_command(&child_process);
289 if (status) {
290 ret = error("external filter %s failed %d", cmd, -status);
291 ret = 0;
294 if (ret) {
295 *dst = nbuf;
296 } else {
297 strbuf_release(&nbuf);
299 return ret;
302 static struct convert_driver {
303 const char *name;
304 struct convert_driver *next;
305 char *smudge;
306 char *clean;
307 } *user_convert, **user_convert_tail;
309 static int read_convert_config(const char *var, const char *value)
311 const char *ep, *name;
312 int namelen;
313 struct convert_driver *drv;
316 * External conversion drivers are configured using
317 * "filter.<name>.variable".
319 if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
320 return 0;
321 name = var + 7;
322 namelen = ep - name;
323 for (drv = user_convert; drv; drv = drv->next)
324 if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
325 break;
326 if (!drv) {
327 char *namebuf;
328 drv = xcalloc(1, sizeof(struct convert_driver));
329 namebuf = xmalloc(namelen + 1);
330 memcpy(namebuf, name, namelen);
331 namebuf[namelen] = 0;
332 drv->name = namebuf;
333 drv->next = NULL;
334 *user_convert_tail = drv;
335 user_convert_tail = &(drv->next);
338 ep++;
341 * filter.<name>.smudge and filter.<name>.clean specifies
342 * the command line:
344 * command-line
346 * The command-line will not be interpolated in any way.
349 if (!strcmp("smudge", ep)) {
350 if (!value)
351 return error("%s: lacks value", var);
352 drv->smudge = strdup(value);
353 return 0;
356 if (!strcmp("clean", ep)) {
357 if (!value)
358 return error("%s: lacks value", var);
359 drv->clean = strdup(value);
360 return 0;
362 return 0;
365 static void setup_convert_check(struct git_attr_check *check)
367 static struct git_attr *attr_crlf;
368 static struct git_attr *attr_ident;
369 static struct git_attr *attr_filter;
371 if (!attr_crlf) {
372 attr_crlf = git_attr("crlf", 4);
373 attr_ident = git_attr("ident", 5);
374 attr_filter = git_attr("filter", 6);
375 user_convert_tail = &user_convert;
376 git_config(read_convert_config);
378 check[0].attr = attr_crlf;
379 check[1].attr = attr_ident;
380 check[2].attr = attr_filter;
383 static int count_ident(const char *cp, unsigned long size)
386 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
388 int cnt = 0;
389 char ch;
391 while (size) {
392 ch = *cp++;
393 size--;
394 if (ch != '$')
395 continue;
396 if (size < 3)
397 break;
398 if (memcmp("Id", cp, 2))
399 continue;
400 ch = cp[2];
401 cp += 3;
402 size -= 3;
403 if (ch == '$')
404 cnt++; /* $Id$ */
405 if (ch != ':')
406 continue;
409 * "$Id: ... "; scan up to the closing dollar sign and discard.
411 while (size) {
412 ch = *cp++;
413 size--;
414 if (ch == '$') {
415 cnt++;
416 break;
420 return cnt;
423 static int ident_to_git(const char *path, const char *src, size_t len,
424 struct strbuf *buf, int ident)
426 char *dst, *dollar;
428 if (!ident || !count_ident(src, len))
429 return 0;
431 strbuf_grow(buf, len);
432 dst = buf->buf;
433 for (;;) {
434 dollar = memchr(src, '$', len);
435 if (!dollar)
436 break;
437 memcpy(dst, src, dollar + 1 - src);
438 dst += dollar + 1 - src;
439 len -= dollar + 1 - src;
440 src = dollar + 1;
442 if (len > 3 && !memcmp(src, "Id:", 3)) {
443 dollar = memchr(src + 3, '$', len - 3);
444 if (!dollar)
445 break;
446 memcpy(dst, "Id$", 3);
447 dst += 3;
448 len -= dollar + 1 - src;
449 src = dollar + 1;
452 memcpy(dst, src, len);
453 strbuf_setlen(buf, dst + len - buf->buf);
454 return 1;
457 static int ident_to_worktree(const char *path, const char *src, size_t len,
458 struct strbuf *buf, int ident)
460 unsigned char sha1[20];
461 char *to_free = NULL, *dollar;
462 int cnt;
464 if (!ident)
465 return 0;
467 cnt = count_ident(src, len);
468 if (!cnt)
469 return 0;
471 /* are we "faking" in place editing ? */
472 if (src == buf->buf)
473 to_free = strbuf_detach(buf);
474 hash_sha1_file(src, len, "blob", sha1);
476 strbuf_grow(buf, len + cnt * 43);
477 for (;;) {
478 /* step 1: run to the next '$' */
479 dollar = memchr(src, '$', len);
480 if (!dollar)
481 break;
482 strbuf_add(buf, src, dollar + 1 - src);
483 len -= dollar + 1 - src;
484 src = dollar + 1;
486 /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
487 if (len < 3 || memcmp("Id", src, 2))
488 continue;
490 /* step 3: skip over Id$ or Id:xxxxx$ */
491 if (src[2] == '$') {
492 src += 3;
493 len -= 3;
494 } else if (src[2] == ':') {
496 * It's possible that an expanded Id has crept its way into the
497 * repository, we cope with that by stripping the expansion out
499 dollar = memchr(src + 3, '$', len - 3);
500 if (!dollar) {
501 /* incomplete keyword, no more '$', so just quit the loop */
502 break;
505 len -= dollar + 1 - src;
506 src = dollar + 1;
507 } else {
508 /* it wasn't a "Id$" or "Id:xxxx$" */
509 continue;
512 /* step 4: substitute */
513 strbuf_addstr(buf, "Id: ");
514 strbuf_add(buf, sha1_to_hex(sha1), 40);
515 strbuf_addstr(buf, " $");
517 strbuf_add(buf, src, len);
519 free(to_free);
520 return 1;
523 static int git_path_check_crlf(const char *path, struct git_attr_check *check)
525 const char *value = check->value;
527 if (ATTR_TRUE(value))
528 return CRLF_TEXT;
529 else if (ATTR_FALSE(value))
530 return CRLF_BINARY;
531 else if (ATTR_UNSET(value))
533 else if (!strcmp(value, "input"))
534 return CRLF_INPUT;
535 return CRLF_GUESS;
538 static struct convert_driver *git_path_check_convert(const char *path,
539 struct git_attr_check *check)
541 const char *value = check->value;
542 struct convert_driver *drv;
544 if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
545 return NULL;
546 for (drv = user_convert; drv; drv = drv->next)
547 if (!strcmp(value, drv->name))
548 return drv;
549 return NULL;
552 static int git_path_check_ident(const char *path, struct git_attr_check *check)
554 const char *value = check->value;
556 return !!ATTR_TRUE(value);
559 int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
561 struct git_attr_check check[3];
562 int crlf = CRLF_GUESS;
563 int ident = 0, ret = 0;
564 char *filter = NULL;
566 setup_convert_check(check);
567 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
568 struct convert_driver *drv;
569 crlf = git_path_check_crlf(path, check + 0);
570 ident = git_path_check_ident(path, check + 1);
571 drv = git_path_check_convert(path, check + 2);
572 if (drv && drv->clean)
573 filter = drv->clean;
576 ret |= apply_filter(path, src, len, dst, filter);
577 if (ret) {
578 src = dst->buf;
579 len = dst->len;
581 ret |= crlf_to_git(path, src, len, dst, crlf);
582 if (ret) {
583 src = dst->buf;
584 len = dst->len;
586 return ret | ident_to_git(path, src, len, dst, ident);
589 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
591 struct git_attr_check check[3];
592 int crlf = CRLF_GUESS;
593 int ident = 0, ret = 0;
594 char *filter = NULL;
596 setup_convert_check(check);
597 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
598 struct convert_driver *drv;
599 crlf = git_path_check_crlf(path, check + 0);
600 ident = git_path_check_ident(path, check + 1);
601 drv = git_path_check_convert(path, check + 2);
602 if (drv && drv->smudge)
603 filter = drv->smudge;
606 ret |= ident_to_worktree(path, src, len, dst, ident);
607 if (ret) {
608 src = dst->buf;
609 len = dst->len;
611 ret |= crlf_to_worktree(path, src, len, dst, crlf);
612 if (ret) {
613 src = dst->buf;
614 len = dst->len;
616 return ret | apply_filter(path, src, len, dst, filter);