Define 'crlf' attribute.
[git.git] / convert.c
blob20c744aa23652b8b93ea96137a668592c6e28c2d
1 #include "cache.h"
2 #include "attr.h"
4 /*
5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
13 struct text_stat {
14 /* CR, LF and CRLF counts */
15 unsigned cr, lf, crlf;
17 /* These are just approximations! */
18 unsigned printable, nonprintable;
21 static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
23 unsigned long i;
25 memset(stats, 0, sizeof(*stats));
27 for (i = 0; i < size; i++) {
28 unsigned char c = buf[i];
29 if (c == '\r') {
30 stats->cr++;
31 if (i+1 < size && buf[i+1] == '\n')
32 stats->crlf++;
33 continue;
35 if (c == '\n') {
36 stats->lf++;
37 continue;
39 if (c == 127)
40 /* DEL */
41 stats->nonprintable++;
42 else if (c < 32) {
43 switch (c) {
44 /* BS, HT, ESC and FF */
45 case '\b': case '\t': case '\033': case '\014':
46 stats->printable++;
47 break;
48 default:
49 stats->nonprintable++;
52 else
53 stats->printable++;
58 * The same heuristics as diff.c::mmfile_is_binary()
60 static int is_binary(unsigned long size, struct text_stat *stats)
63 if ((stats->printable >> 7) < stats->nonprintable)
64 return 1;
66 * Other heuristics? Average line length might be relevant,
67 * as might LF vs CR vs CRLF counts..
69 * NOTE! It might be normal to have a low ratio of CRLF to LF
70 * (somebody starts with a LF-only file and edits it with an editor
71 * that adds CRLF only to lines that are added..). But do we
72 * want to support CR-only? Probably not.
74 return 0;
77 static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
79 char *buffer, *nbuf;
80 unsigned long size, nsize;
81 struct text_stat stats;
83 if (!auto_crlf)
84 return 0;
86 size = *sizep;
87 if (!size)
88 return 0;
89 buffer = *bufp;
91 gather_stats(buffer, size, &stats);
93 /* No CR? Nothing to convert, regardless. */
94 if (!stats.cr)
95 return 0;
98 * We're currently not going to even try to convert stuff
99 * that has bare CR characters. Does anybody do that crazy
100 * stuff?
102 if (stats.cr != stats.crlf)
103 return 0;
106 * And add some heuristics for binary vs text, of course...
108 if (is_binary(size, &stats))
109 return 0;
112 * Ok, allocate a new buffer, fill it in, and return true
113 * to let the caller know that we switched buffers on it.
115 nsize = size - stats.crlf;
116 nbuf = xmalloc(nsize);
117 *bufp = nbuf;
118 *sizep = nsize;
119 do {
120 unsigned char c = *buffer++;
121 if (c != '\r')
122 *nbuf++ = c;
123 } while (--size);
125 return 1;
128 static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
130 char *buffer, *nbuf;
131 unsigned long size, nsize;
132 struct text_stat stats;
133 unsigned char last;
136 * FIXME! Other pluggable conversions should go here,
137 * based on filename patterns. Right now we just do the
138 * stupid auto-CRLF one.
140 if (auto_crlf <= 0)
141 return 0;
143 size = *sizep;
144 if (!size)
145 return 0;
146 buffer = *bufp;
148 gather_stats(buffer, size, &stats);
150 /* No LF? Nothing to convert, regardless. */
151 if (!stats.lf)
152 return 0;
154 /* Was it already in CRLF format? */
155 if (stats.lf == stats.crlf)
156 return 0;
158 /* If we have any bare CR characters, we're not going to touch it */
159 if (stats.cr != stats.crlf)
160 return 0;
162 if (is_binary(size, &stats))
163 return 0;
166 * Ok, allocate a new buffer, fill it in, and return true
167 * to let the caller know that we switched buffers on it.
169 nsize = size + stats.lf - stats.crlf;
170 nbuf = xmalloc(nsize);
171 *bufp = nbuf;
172 *sizep = nsize;
173 last = 0;
174 do {
175 unsigned char c = *buffer++;
176 if (c == '\n' && last != '\r')
177 *nbuf++ = '\r';
178 *nbuf++ = c;
179 last = c;
180 } while (--size);
182 return 1;
185 static void setup_crlf_check(struct git_attr_check *check)
187 static struct git_attr *attr_crlf;
189 if (!attr_crlf)
190 attr_crlf = git_attr("crlf", 4);
191 check->attr = attr_crlf;
194 static int git_path_is_binary(const char *path)
196 struct git_attr_check attr_crlf_check;
198 setup_crlf_check(&attr_crlf_check);
201 * If crlf is not mentioned, default to autocrlf;
202 * disable autocrlf only when crlf attribute is explicitly
203 * unset.
205 return (!git_checkattr(path, 1, &attr_crlf_check) &&
206 (0 == attr_crlf_check.isset));
209 int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
211 if (git_path_is_binary(path))
212 return 0;
213 return autocrlf_to_git(path, bufp, sizep);
216 int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
218 if (git_path_is_binary(path))
219 return 0;
220 return autocrlf_to_working_tree(path, bufp, sizep);