3 #include "run-command.h"
6 * convert.c - convert a file when checking it out and checking it in.
8 * This should use the pathname to decide on whether it wants to do some
9 * more interesting conversions (automatic gzip/unzip, general format
10 * conversions etc etc), but by default it just does automatic CRLF<->LF
11 * translation when the "auto_crlf" option is set.
14 #define CRLF_GUESS (-1)
20 /* CR, LF and CRLF counts */
21 unsigned cr
, lf
, crlf
;
23 /* These are just approximations! */
24 unsigned printable
, nonprintable
;
27 static void gather_stats(const char *buf
, unsigned long size
, struct text_stat
*stats
)
31 memset(stats
, 0, sizeof(*stats
));
33 for (i
= 0; i
< size
; i
++) {
34 unsigned char c
= buf
[i
];
37 if (i
+1 < size
&& buf
[i
+1] == '\n')
47 stats
->nonprintable
++;
50 /* BS, HT, ESC and FF */
51 case '\b': case '\t': case '\033': case '\014':
55 stats
->nonprintable
++;
64 * The same heuristics as diff.c::mmfile_is_binary()
66 static int is_binary(unsigned long size
, struct text_stat
*stats
)
69 if ((stats
->printable
>> 7) < stats
->nonprintable
)
72 * Other heuristics? Average line length might be relevant,
73 * as might LF vs CR vs CRLF counts..
75 * NOTE! It might be normal to have a low ratio of CRLF to LF
76 * (somebody starts with a LF-only file and edits it with an editor
77 * that adds CRLF only to lines that are added..). But do we
78 * want to support CR-only? Probably not.
83 static char *crlf_to_git(const char *path
, const char *src
, unsigned long *sizep
, int action
)
86 unsigned long size
, nsize
;
87 struct text_stat stats
;
89 if ((action
== CRLF_BINARY
) || (action
== CRLF_GUESS
&& !auto_crlf
))
96 gather_stats(src
, size
, &stats
);
98 /* No CR? Nothing to convert, regardless. */
102 if (action
== CRLF_GUESS
) {
104 * We're currently not going to even try to convert stuff
105 * that has bare CR characters. Does anybody do that crazy
108 if (stats
.cr
!= stats
.crlf
)
112 * And add some heuristics for binary vs text, of course...
114 if (is_binary(size
, &stats
))
119 * Ok, allocate a new buffer, fill it in, and return it
120 * to let the caller know that we switched buffers.
122 nsize
= size
- stats
.crlf
;
123 buffer
= xmalloc(nsize
);
127 if (action
== CRLF_GUESS
) {
129 * If we guessed, we already know we rejected a file with
130 * lone CR, and we can strip a CR without looking at what
134 unsigned char c
= *src
++;
140 unsigned char c
= *src
++;
141 if (! (c
== '\r' && (1 < size
&& *src
== '\n')))
149 static char *crlf_to_worktree(const char *path
, const char *src
, unsigned long *sizep
, int action
)
152 unsigned long size
, nsize
;
153 struct text_stat stats
;
156 if ((action
== CRLF_BINARY
) || (action
== CRLF_INPUT
) ||
157 (action
== CRLF_GUESS
&& auto_crlf
<= 0))
164 gather_stats(src
, size
, &stats
);
166 /* No LF? Nothing to convert, regardless. */
170 /* Was it already in CRLF format? */
171 if (stats
.lf
== stats
.crlf
)
174 if (action
== CRLF_GUESS
) {
175 /* If we have any bare CR characters, we're not going to touch it */
176 if (stats
.cr
!= stats
.crlf
)
179 if (is_binary(size
, &stats
))
184 * Ok, allocate a new buffer, fill it in, and return it
185 * to let the caller know that we switched buffers.
187 nsize
= size
+ stats
.lf
- stats
.crlf
;
188 buffer
= xmalloc(nsize
);
194 unsigned char c
= *src
++;
195 if (c
== '\n' && last
!= '\r')
204 static void setup_convert_check(struct git_attr_check
*check
)
206 static struct git_attr
*attr_crlf
;
207 static struct git_attr
*attr_ident
;
210 attr_crlf
= git_attr("crlf", 4);
211 attr_ident
= git_attr("ident", 5);
213 check
[0].attr
= attr_crlf
;
214 check
[1].attr
= attr_ident
;
217 static int count_ident(const char *cp
, unsigned long size
)
220 * "$ident: 0000000000000000000000000000000000000000 $" <=> "$ident$"
232 if (memcmp("ident", cp
, 5))
243 * "$ident: ... "; scan up to the closing dollar sign and discard.
257 static char *ident_to_git(const char *path
, const char *src
, unsigned long *sizep
, int ident
)
266 cnt
= count_ident(src
, size
);
271 for (dst
= buf
; size
; size
--) {
274 if ((ch
== '$') && (6 <= size
) &&
275 !memcmp("ident:", src
, 6)) {
276 unsigned long rem
= size
- 6;
277 const char *cp
= src
+ 6;
286 memcpy(dst
, "ident$", 6);
297 static char *ident_to_worktree(const char *path
, const char *src
, unsigned long *sizep
, int ident
)
302 unsigned char sha1
[20];
308 cnt
= count_ident(src
, size
);
312 hash_sha1_file(src
, size
, "blob", sha1
);
313 buf
= xmalloc(size
+ cnt
* 43);
315 for (dst
= buf
; size
; size
--) {
319 if ((ch
!= '$') || (size
< 6) || memcmp("ident", src
, 5))
323 /* discard up to but not including the closing $ */
324 unsigned long rem
= size
- 6;
335 } else if (src
[5] == '$')
340 memcpy(dst
, "ident: ", 7);
342 memcpy(dst
, sha1_to_hex(sha1
), 40);
355 static int git_path_check_crlf(const char *path
, struct git_attr_check
*check
)
357 const char *value
= check
->value
;
359 if (ATTR_TRUE(value
))
361 else if (ATTR_FALSE(value
))
363 else if (ATTR_UNSET(value
))
365 else if (!strcmp(value
, "input"))
370 static int git_path_check_ident(const char *path
, struct git_attr_check
*check
)
372 const char *value
= check
->value
;
374 return !!ATTR_TRUE(value
);
377 char *convert_to_git(const char *path
, const char *src
, unsigned long *sizep
)
379 struct git_attr_check check
[2];
380 int crlf
= CRLF_GUESS
;
384 setup_convert_check(check
);
385 if (!git_checkattr(path
, ARRAY_SIZE(check
), check
)) {
386 crlf
= git_path_check_crlf(path
, check
+ 0);
387 ident
= git_path_check_ident(path
, check
+ 1);
390 buf
= crlf_to_git(path
, src
, sizep
, crlf
);
392 buf2
= ident_to_git(path
, buf
? buf
: src
, sizep
, ident
);
401 char *convert_to_working_tree(const char *path
, const char *src
, unsigned long *sizep
)
403 struct git_attr_check check
[2];
404 int crlf
= CRLF_GUESS
;
408 setup_convert_check(check
);
409 if (!git_checkattr(path
, ARRAY_SIZE(check
), check
)) {
410 crlf
= git_path_check_crlf(path
, check
+ 0);
411 ident
= git_path_check_ident(path
, check
+ 1);
414 buf
= ident_to_worktree(path
, src
, sizep
, ident
);
416 buf2
= crlf_to_worktree(path
, buf
? buf
: src
, sizep
, crlf
);