5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
14 /* CR, LF and CRLF counts */
15 unsigned cr
, lf
, crlf
;
17 /* These are just approximations! */
18 unsigned printable
, nonprintable
;
21 static void gather_stats(const char *buf
, unsigned long size
, struct text_stat
*stats
)
25 memset(stats
, 0, sizeof(*stats
));
27 for (i
= 0; i
< size
; i
++) {
28 unsigned char c
= buf
[i
];
31 if (i
+1 < size
&& buf
[i
+1] == '\n')
41 stats
->nonprintable
++;
44 /* BS, HT, ESC and FF */
45 case '\b': case '\t': case '\033': case '\014':
49 stats
->nonprintable
++;
58 * The same heuristics as diff.c::mmfile_is_binary()
60 static int is_binary(unsigned long size
, struct text_stat
*stats
)
63 if ((stats
->printable
>> 7) < stats
->nonprintable
)
66 * Other heuristics? Average line length might be relevant,
67 * as might LF vs CR vs CRLF counts..
69 * NOTE! It might be normal to have a low ratio of CRLF to LF
70 * (somebody starts with a LF-only file and edits it with an editor
71 * that adds CRLF only to lines that are added..). But do we
72 * want to support CR-only? Probably not.
77 static int autocrlf_to_git(const char *path
, char **bufp
, unsigned long *sizep
)
80 unsigned long size
, nsize
;
81 struct text_stat stats
;
91 gather_stats(buffer
, size
, &stats
);
93 /* No CR? Nothing to convert, regardless. */
98 * We're currently not going to even try to convert stuff
99 * that has bare CR characters. Does anybody do that crazy
102 if (stats
.cr
!= stats
.crlf
)
106 * And add some heuristics for binary vs text, of course...
108 if (is_binary(size
, &stats
))
112 * Ok, allocate a new buffer, fill it in, and return true
113 * to let the caller know that we switched buffers on it.
115 nsize
= size
- stats
.crlf
;
116 nbuf
= xmalloc(nsize
);
120 unsigned char c
= *buffer
++;
128 static int autocrlf_to_working_tree(const char *path
, char **bufp
, unsigned long *sizep
)
131 unsigned long size
, nsize
;
132 struct text_stat stats
;
136 * FIXME! Other pluggable conversions should go here,
137 * based on filename patterns. Right now we just do the
138 * stupid auto-CRLF one.
148 gather_stats(buffer
, size
, &stats
);
150 /* No LF? Nothing to convert, regardless. */
154 /* Was it already in CRLF format? */
155 if (stats
.lf
== stats
.crlf
)
158 /* If we have any bare CR characters, we're not going to touch it */
159 if (stats
.cr
!= stats
.crlf
)
162 if (is_binary(size
, &stats
))
166 * Ok, allocate a new buffer, fill it in, and return true
167 * to let the caller know that we switched buffers on it.
169 nsize
= size
+ stats
.lf
- stats
.crlf
;
170 nbuf
= xmalloc(nsize
);
175 unsigned char c
= *buffer
++;
176 if (c
== '\n' && last
!= '\r')
185 static void setup_crlf_check(struct git_attr_check
*check
)
187 static struct git_attr
*attr_crlf
;
190 attr_crlf
= git_attr("crlf", 4);
191 check
->attr
= attr_crlf
;
194 static int git_path_is_binary(const char *path
)
196 struct git_attr_check attr_crlf_check
;
198 setup_crlf_check(&attr_crlf_check
);
201 * If crlf is not mentioned, default to autocrlf;
202 * disable autocrlf only when crlf attribute is explicitly
205 return (!git_checkattr(path
, 1, &attr_crlf_check
) &&
206 (0 == attr_crlf_check
.isset
));
209 int convert_to_git(const char *path
, char **bufp
, unsigned long *sizep
)
211 if (git_path_is_binary(path
))
213 return autocrlf_to_git(path
, bufp
, sizep
);
216 int convert_to_working_tree(const char *path
, char **bufp
, unsigned long *sizep
)
218 if (git_path_is_binary(path
))
220 return autocrlf_to_working_tree(path
, bufp
, sizep
);