5 * convert.c - convert a file when checking it out and checking it in.
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
13 #define CRLF_GUESS (-1)
19 /* CR, LF and CRLF counts */
20 unsigned cr
, lf
, crlf
;
22 /* These are just approximations! */
23 unsigned printable
, nonprintable
;
26 static void gather_stats(const char *buf
, unsigned long size
, struct text_stat
*stats
)
30 memset(stats
, 0, sizeof(*stats
));
32 for (i
= 0; i
< size
; i
++) {
33 unsigned char c
= buf
[i
];
36 if (i
+1 < size
&& buf
[i
+1] == '\n')
46 stats
->nonprintable
++;
49 /* BS, HT, ESC and FF */
50 case '\b': case '\t': case '\033': case '\014':
54 stats
->nonprintable
++;
63 * The same heuristics as diff.c::mmfile_is_binary()
65 static int is_binary(unsigned long size
, struct text_stat
*stats
)
68 if ((stats
->printable
>> 7) < stats
->nonprintable
)
71 * Other heuristics? Average line length might be relevant,
72 * as might LF vs CR vs CRLF counts..
74 * NOTE! It might be normal to have a low ratio of CRLF to LF
75 * (somebody starts with a LF-only file and edits it with an editor
76 * that adds CRLF only to lines that are added..). But do we
77 * want to support CR-only? Probably not.
82 static char *crlf_to_git(const char *path
, const char *src
, unsigned long *sizep
, int action
)
85 unsigned long size
, nsize
;
86 struct text_stat stats
;
88 if ((action
== CRLF_BINARY
) || (action
== CRLF_GUESS
&& !auto_crlf
))
95 gather_stats(src
, size
, &stats
);
97 /* No CR? Nothing to convert, regardless. */
101 if (action
== CRLF_GUESS
) {
103 * We're currently not going to even try to convert stuff
104 * that has bare CR characters. Does anybody do that crazy
107 if (stats
.cr
!= stats
.crlf
)
111 * And add some heuristics for binary vs text, of course...
113 if (is_binary(size
, &stats
))
118 * Ok, allocate a new buffer, fill it in, and return true
119 * to let the caller know that we switched buffers on it.
121 nsize
= size
- stats
.crlf
;
122 buffer
= xmalloc(nsize
);
126 if (action
== CRLF_GUESS
) {
128 * If we guessed, we already know we rejected a file with
129 * lone CR, and we can strip a CR without looking at what
133 unsigned char c
= *src
++;
139 unsigned char c
= *src
++;
140 if (! (c
== '\r' && (1 < size
&& *buffer
== '\n')))
148 static char *crlf_to_worktree(const char *path
, const char *src
, unsigned long *sizep
, int action
)
151 unsigned long size
, nsize
;
152 struct text_stat stats
;
155 if ((action
== CRLF_BINARY
) || (action
== CRLF_INPUT
) ||
156 (action
== CRLF_GUESS
&& auto_crlf
<= 0))
163 gather_stats(src
, size
, &stats
);
165 /* No LF? Nothing to convert, regardless. */
169 /* Was it already in CRLF format? */
170 if (stats
.lf
== stats
.crlf
)
173 if (action
== CRLF_GUESS
) {
174 /* If we have any bare CR characters, we're not going to touch it */
175 if (stats
.cr
!= stats
.crlf
)
178 if (is_binary(size
, &stats
))
183 * Ok, allocate a new buffer, fill it in, and return true
184 * to let the caller know that we switched buffers on it.
186 nsize
= size
+ stats
.lf
- stats
.crlf
;
187 buffer
= xmalloc(nsize
);
193 unsigned char c
= *src
++;
194 if (c
== '\n' && last
!= '\r')
203 static void setup_convert_check(struct git_attr_check
*check
)
205 static struct git_attr
*attr_crlf
;
208 attr_crlf
= git_attr("crlf", 4);
209 check
->attr
= attr_crlf
;
212 static int git_path_check_crlf(const char *path
, struct git_attr_check
*check
)
214 const char *value
= check
->value
;
216 if (ATTR_TRUE(value
))
218 else if (ATTR_FALSE(value
))
220 else if (ATTR_UNSET(value
))
222 else if (!strcmp(value
, "input"))
227 char *convert_to_git(const char *path
, const char *src
, unsigned long *sizep
)
229 struct git_attr_check check
[1];
230 int crlf
= CRLF_GUESS
;
232 setup_convert_check(check
);
233 if (!git_checkattr(path
, 1, check
)) {
234 crlf
= git_path_check_crlf(path
, check
);
236 return crlf_to_git(path
, src
, sizep
, crlf
);
239 char *convert_to_working_tree(const char *path
, const char *src
, unsigned long *sizep
)
241 struct git_attr_check check
[1];
242 int crlf
= CRLF_GUESS
;
244 setup_convert_check(check
);
245 if (!git_checkattr(path
, 1, check
)) {
246 crlf
= git_path_check_crlf(path
, check
);
248 return crlf_to_worktree(path
, src
, sizep
, crlf
);