3 * convert.c - convert a file when checking it out and checking it in.
5 * This should use the pathname to decide on whether it wants to do some
6 * more interesting conversions (automatic gzip/unzip, general format
7 * conversions etc etc), but by default it just does automatic CRLF<->LF
8 * translation when the "auto_crlf" option is set.
12 /* CR, LF and CRLF counts */
13 unsigned cr
, lf
, crlf
;
15 /* These are just approximations! */
16 unsigned printable
, nonprintable
;
19 static void gather_stats(const char *buf
, unsigned long size
, struct text_stat
*stats
)
23 memset(stats
, 0, sizeof(*stats
));
25 for (i
= 0; i
< size
; i
++) {
26 unsigned char c
= buf
[i
];
29 if (i
+1 < size
&& buf
[i
+1] == '\n')
39 stats
->nonprintable
++;
42 /* BS, HT, ESC and FF */
43 case '\b': case '\t': case '\033': case '\014':
47 stats
->nonprintable
++;
56 * The same heuristics as diff.c::mmfile_is_binary()
58 static int is_binary(unsigned long size
, struct text_stat
*stats
)
61 if ((stats
->printable
>> 7) < stats
->nonprintable
)
64 * Other heuristics? Average line length might be relevant,
65 * as might LF vs CR vs CRLF counts..
67 * NOTE! It might be normal to have a low ratio of CRLF to LF
68 * (somebody starts with a LF-only file and edits it with an editor
69 * that adds CRLF only to lines that are added..). But do we
70 * want to support CR-only? Probably not.
75 int convert_to_git(const char *path
, char **bufp
, unsigned long *sizep
)
78 unsigned long size
, nsize
;
79 struct text_stat stats
;
82 * FIXME! Other pluggable conversions should go here,
83 * based on filename patterns. Right now we just do the
84 * stupid auto-CRLF one.
94 gather_stats(buffer
, size
, &stats
);
96 /* No CR? Nothing to convert, regardless. */
101 * We're currently not going to even try to convert stuff
102 * that has bare CR characters. Does anybody do that crazy
105 if (stats
.cr
!= stats
.crlf
)
109 * And add some heuristics for binary vs text, of course...
111 if (is_binary(size
, &stats
))
115 * Ok, allocate a new buffer, fill it in, and return true
116 * to let the caller know that we switched buffers on it.
118 nsize
= size
- stats
.crlf
;
119 nbuf
= xmalloc(nsize
);
123 unsigned char c
= *buffer
++;
131 int convert_to_working_tree(const char *path
, char **bufp
, unsigned long *sizep
)
134 unsigned long size
, nsize
;
135 struct text_stat stats
;
139 * FIXME! Other pluggable conversions should go here,
140 * based on filename patterns. Right now we just do the
141 * stupid auto-CRLF one.
151 gather_stats(buffer
, size
, &stats
);
153 /* No LF? Nothing to convert, regardless. */
157 /* Was it already in CRLF format? */
158 if (stats
.lf
== stats
.crlf
)
161 /* If we have any bare CR characters, we're not going to touch it */
162 if (stats
.cr
!= stats
.crlf
)
165 if (is_binary(size
, &stats
))
169 * Ok, allocate a new buffer, fill it in, and return true
170 * to let the caller know that we switched buffers on it.
172 nsize
= size
+ stats
.lf
- stats
.crlf
;
173 nbuf
= xmalloc(nsize
);
178 unsigned char c
= *buffer
++;
179 if (c
== '\n' && last
!= '\r')