5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
13 #define CRLF_GUESS (-1)
19 /* CR, LF and CRLF counts */
20 unsigned cr
, lf
, crlf
;
22 /* These are just approximations! */
23 unsigned printable
, nonprintable
;
26 static void gather_stats(const char *buf
, unsigned long size
, struct text_stat
*stats
)
30 memset(stats
, 0, sizeof(*stats
));
32 for (i
= 0; i
< size
; i
++) {
33 unsigned char c
= buf
[i
];
36 if (i
+1 < size
&& buf
[i
+1] == '\n')
46 stats
->nonprintable
++;
49 /* BS, HT, ESC and FF */
50 case '\b': case '\t': case '\033': case '\014':
54 stats
->nonprintable
++;
63 * The same heuristics as diff.c::mmfile_is_binary()
65 static int is_binary(unsigned long size
, struct text_stat
*stats
)
68 if ((stats
->printable
>> 7) < stats
->nonprintable
)
71 * Other heuristics? Average line length might be relevant,
72 * as might LF vs CR vs CRLF counts..
74 * NOTE! It might be normal to have a low ratio of CRLF to LF
75 * (somebody starts with a LF-only file and edits it with an editor
76 * that adds CRLF only to lines that are added..). But do we
77 * want to support CR-only? Probably not.
82 static int crlf_to_git(const char *path
, char **bufp
, unsigned long *sizep
, int action
)
85 unsigned long size
, nsize
;
86 struct text_stat stats
;
88 if ((action
== CRLF_BINARY
) || (action
== CRLF_GUESS
&& !auto_crlf
))
96 gather_stats(buffer
, size
, &stats
);
98 /* No CR? Nothing to convert, regardless. */
102 if (action
== CRLF_GUESS
) {
104 * We're currently not going to even try to convert stuff
105 * that has bare CR characters. Does anybody do that crazy
108 if (stats
.cr
!= stats
.crlf
)
112 * And add some heuristics for binary vs text, of course...
114 if (is_binary(size
, &stats
))
119 * Ok, allocate a new buffer, fill it in, and return true
120 * to let the caller know that we switched buffers on it.
122 nsize
= size
- stats
.crlf
;
123 nbuf
= xmalloc(nsize
);
127 if (action
== CRLF_GUESS
) {
129 * If we guessed, we already know we rejected a file with
130 * lone CR, and we can strip a CR without looking at what
134 unsigned char c
= *buffer
++;
140 unsigned char c
= *buffer
++;
141 if (! (c
== '\r' && (1 < size
&& *buffer
== '\n')))
149 static int crlf_to_worktree(const char *path
, char **bufp
, unsigned long *sizep
, int action
)
152 unsigned long size
, nsize
;
153 struct text_stat stats
;
156 if ((action
== CRLF_BINARY
) || (action
== CRLF_INPUT
) ||
157 (action
== CRLF_GUESS
&& auto_crlf
<= 0))
165 gather_stats(buffer
, size
, &stats
);
167 /* No LF? Nothing to convert, regardless. */
171 /* Was it already in CRLF format? */
172 if (stats
.lf
== stats
.crlf
)
175 if (action
== CRLF_GUESS
) {
176 /* If we have any bare CR characters, we're not going to touch it */
177 if (stats
.cr
!= stats
.crlf
)
180 if (is_binary(size
, &stats
))
185 * Ok, allocate a new buffer, fill it in, and return true
186 * to let the caller know that we switched buffers on it.
188 nsize
= size
+ stats
.lf
- stats
.crlf
;
189 nbuf
= xmalloc(nsize
);
194 unsigned char c
= *buffer
++;
195 if (c
== '\n' && last
!= '\r')
204 static void setup_crlf_check(struct git_attr_check
*check
)
206 static struct git_attr
*attr_crlf
;
209 attr_crlf
= git_attr("crlf", 4);
210 check
->attr
= attr_crlf
;
213 static int git_path_check_crlf(const char *path
)
215 struct git_attr_check attr_crlf_check
;
217 setup_crlf_check(&attr_crlf_check
);
219 if (!git_checkattr(path
, 1, &attr_crlf_check
)) {
220 const char *value
= attr_crlf_check
.value
;
221 if (ATTR_TRUE(value
))
223 else if (ATTR_FALSE(value
))
225 else if (ATTR_UNSET(value
))
227 else if (!strcmp(value
, "input"))
234 int convert_to_git(const char *path
, char **bufp
, unsigned long *sizep
)
236 return crlf_to_git(path
, bufp
, sizep
, git_path_check_crlf(path
));
239 int convert_to_working_tree(const char *path
, char **bufp
, unsigned long *sizep
)
241 return crlf_to_worktree(path
, bufp
, sizep
, git_path_check_crlf(path
));