11 #include "sha1-array.h"
16 #define FOREACH_MSG_ID(FUNC) \
18 FUNC(NUL_IN_HEADER, FATAL) \
19 FUNC(UNTERMINATED_HEADER, FATAL) \
21 FUNC(BAD_DATE, ERROR) \
22 FUNC(BAD_DATE_OVERFLOW, ERROR) \
23 FUNC(BAD_EMAIL, ERROR) \
24 FUNC(BAD_NAME, ERROR) \
25 FUNC(BAD_OBJECT_SHA1, ERROR) \
26 FUNC(BAD_PARENT_SHA1, ERROR) \
27 FUNC(BAD_TAG_OBJECT, ERROR) \
28 FUNC(BAD_TIMEZONE, ERROR) \
29 FUNC(BAD_TREE, ERROR) \
30 FUNC(BAD_TREE_SHA1, ERROR) \
31 FUNC(BAD_TYPE, ERROR) \
32 FUNC(DUPLICATE_ENTRIES, ERROR) \
33 FUNC(MISSING_AUTHOR, ERROR) \
34 FUNC(MISSING_COMMITTER, ERROR) \
35 FUNC(MISSING_EMAIL, ERROR) \
36 FUNC(MISSING_GRAFT, ERROR) \
37 FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
38 FUNC(MISSING_OBJECT, ERROR) \
39 FUNC(MISSING_PARENT, ERROR) \
40 FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
41 FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
42 FUNC(MISSING_TAG, ERROR) \
43 FUNC(MISSING_TAG_ENTRY, ERROR) \
44 FUNC(MISSING_TAG_OBJECT, ERROR) \
45 FUNC(MISSING_TREE, ERROR) \
46 FUNC(MISSING_TYPE, ERROR) \
47 FUNC(MISSING_TYPE_ENTRY, ERROR) \
48 FUNC(MULTIPLE_AUTHORS, ERROR) \
49 FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
50 FUNC(TREE_NOT_SORTED, ERROR) \
51 FUNC(UNKNOWN_TYPE, ERROR) \
52 FUNC(ZERO_PADDED_DATE, ERROR) \
54 FUNC(BAD_FILEMODE, WARN) \
55 FUNC(EMPTY_NAME, WARN) \
56 FUNC(FULL_PATHNAME, WARN) \
58 FUNC(HAS_DOTDOT, WARN) \
59 FUNC(HAS_DOTGIT, WARN) \
60 FUNC(NULL_SHA1, WARN) \
61 FUNC(ZERO_PADDED_FILEMODE, WARN) \
62 FUNC(NUL_IN_COMMIT, WARN) \
63 /* infos (reported as warnings, but ignored by default) */ \
64 FUNC(BAD_TAG_NAME, INFO) \
65 FUNC(MISSING_TAGGER_ENTRY, INFO)
67 #define MSG_ID(id, msg_type) FSCK_MSG_##id,
69 FOREACH_MSG_ID(MSG_ID
)
75 #define MSG_ID(id, msg_type) { STR(id), NULL, FSCK_##msg_type },
77 const char *id_string
;
78 const char *downcased
;
80 } msg_id_info
[FSCK_MSG_MAX
+ 1] = {
81 FOREACH_MSG_ID(MSG_ID
)
86 static int parse_msg_id(const char *text
)
90 if (!msg_id_info
[0].downcased
) {
91 /* convert id_string to lower case, without underscores. */
92 for (i
= 0; i
< FSCK_MSG_MAX
; i
++) {
93 const char *p
= msg_id_info
[i
].id_string
;
95 char *q
= xmalloc(len
);
97 msg_id_info
[i
].downcased
= q
;
102 *(q
)++ = tolower(*(p
)++);
107 for (i
= 0; i
< FSCK_MSG_MAX
; i
++)
108 if (!strcmp(text
, msg_id_info
[i
].downcased
))
114 static int fsck_msg_type(enum fsck_msg_id msg_id
,
115 struct fsck_options
*options
)
119 assert(msg_id
>= 0 && msg_id
< FSCK_MSG_MAX
);
121 if (options
->msg_type
)
122 msg_type
= options
->msg_type
[msg_id
];
124 msg_type
= msg_id_info
[msg_id
].msg_type
;
125 if (options
->strict
&& msg_type
== FSCK_WARN
)
126 msg_type
= FSCK_ERROR
;
132 static void init_skiplist(struct fsck_options
*options
, const char *path
)
134 static struct sha1_array skiplist
= SHA1_ARRAY_INIT
;
137 unsigned char sha1
[20];
139 if (options
->skiplist
)
140 sorted
= options
->skiplist
->sorted
;
143 options
->skiplist
= &skiplist
;
146 fd
= open(path
, O_RDONLY
);
148 die("Could not open skip list: %s", path
);
150 int result
= read_in_full(fd
, buffer
, sizeof(buffer
));
152 die_errno("Could not read '%s'", path
);
155 if (get_sha1_hex(buffer
, sha1
) || buffer
[40] != '\n')
156 die("Invalid SHA-1: %s", buffer
);
157 sha1_array_append(&skiplist
, sha1
);
158 if (sorted
&& skiplist
.nr
> 1 &&
159 hashcmp(skiplist
.sha1
[skiplist
.nr
- 2],
169 static int parse_msg_type(const char *str
)
171 if (!strcmp(str
, "error"))
173 else if (!strcmp(str
, "warn"))
175 else if (!strcmp(str
, "ignore"))
178 die("Unknown fsck message type: '%s'", str
);
181 int is_valid_msg_type(const char *msg_id
, const char *msg_type
)
183 if (parse_msg_id(msg_id
) < 0)
185 parse_msg_type(msg_type
);
189 void fsck_set_msg_type(struct fsck_options
*options
,
190 const char *msg_id
, const char *msg_type
)
192 int id
= parse_msg_id(msg_id
), type
;
195 die("Unhandled message id: %s", msg_id
);
196 type
= parse_msg_type(msg_type
);
198 if (type
!= FSCK_ERROR
&& msg_id_info
[id
].msg_type
== FSCK_FATAL
)
199 die("Cannot demote %s to %s", msg_id
, msg_type
);
201 if (!options
->msg_type
) {
204 ALLOC_ARRAY(msg_type
, FSCK_MSG_MAX
);
205 for (i
= 0; i
< FSCK_MSG_MAX
; i
++)
206 msg_type
[i
] = fsck_msg_type(i
, options
);
207 options
->msg_type
= msg_type
;
210 options
->msg_type
[id
] = type
;
213 void fsck_set_msg_types(struct fsck_options
*options
, const char *values
)
215 char *buf
= xstrdup(values
), *to_free
= buf
;
219 int len
= strcspn(buf
, " ,|"), equal
;
229 equal
< len
&& buf
[equal
] != '=' && buf
[equal
] != ':';
231 buf
[equal
] = tolower(buf
[equal
]);
234 if (!strcmp(buf
, "skiplist")) {
236 die("skiplist requires a path");
237 init_skiplist(options
, buf
+ equal
+ 1);
243 die("Missing '=': '%s'", buf
);
245 fsck_set_msg_type(options
, buf
, buf
+ equal
+ 1);
251 static void append_msg_id(struct strbuf
*sb
, const char *msg_id
)
254 char c
= *(msg_id
)++;
259 strbuf_addch(sb
, tolower(c
));
262 strbuf_addch(sb
, *(msg_id
)++);
266 strbuf_addstr(sb
, ": ");
269 __attribute__((format (printf
, 4, 5)))
270 static int report(struct fsck_options
*options
, struct object
*object
,
271 enum fsck_msg_id id
, const char *fmt
, ...)
274 struct strbuf sb
= STRBUF_INIT
;
275 int msg_type
= fsck_msg_type(id
, options
), result
;
277 if (msg_type
== FSCK_IGNORE
)
280 if (options
->skiplist
&& object
&&
281 sha1_array_lookup(options
->skiplist
, object
->oid
.hash
) >= 0)
284 if (msg_type
== FSCK_FATAL
)
285 msg_type
= FSCK_ERROR
;
286 else if (msg_type
== FSCK_INFO
)
287 msg_type
= FSCK_WARN
;
289 append_msg_id(&sb
, msg_id_info
[id
].id_string
);
292 strbuf_vaddf(&sb
, fmt
, ap
);
293 result
= options
->error_func(object
, msg_type
, sb
.buf
);
300 static int fsck_walk_tree(struct tree
*tree
, void *data
, struct fsck_options
*options
)
302 struct tree_desc desc
;
303 struct name_entry entry
;
306 if (parse_tree(tree
))
309 init_tree_desc(&desc
, tree
->buffer
, tree
->size
);
310 while (tree_entry(&desc
, &entry
)) {
313 if (S_ISGITLINK(entry
.mode
))
315 if (S_ISDIR(entry
.mode
))
316 result
= options
->walk(&lookup_tree(entry
.oid
->hash
)->object
, OBJ_TREE
, data
, options
);
317 else if (S_ISREG(entry
.mode
) || S_ISLNK(entry
.mode
))
318 result
= options
->walk(&lookup_blob(entry
.oid
->hash
)->object
, OBJ_BLOB
, data
, options
);
320 result
= error("in tree %s: entry %s has bad mode %.6o",
321 oid_to_hex(&tree
->object
.oid
), entry
.path
, entry
.mode
);
331 static int fsck_walk_commit(struct commit
*commit
, void *data
, struct fsck_options
*options
)
333 struct commit_list
*parents
;
337 if (parse_commit(commit
))
340 result
= options
->walk((struct object
*)commit
->tree
, OBJ_TREE
, data
, options
);
345 parents
= commit
->parents
;
347 result
= options
->walk((struct object
*)parents
->item
, OBJ_COMMIT
, data
, options
);
352 parents
= parents
->next
;
357 static int fsck_walk_tag(struct tag
*tag
, void *data
, struct fsck_options
*options
)
361 return options
->walk(tag
->tagged
, OBJ_ANY
, data
, options
);
364 int fsck_walk(struct object
*obj
, void *data
, struct fsck_options
*options
)
372 return fsck_walk_tree((struct tree
*)obj
, data
, options
);
374 return fsck_walk_commit((struct commit
*)obj
, data
, options
);
376 return fsck_walk_tag((struct tag
*)obj
, data
, options
);
378 error("Unknown object type for %s", oid_to_hex(&obj
->oid
));
384 * The entries in a tree are ordered in the _path_ order,
385 * which means that a directory entry is ordered by adding
386 * a slash to the end of it.
388 * So a directory called "a" is ordered _after_ a file
389 * called "a.c", because "a/" sorts after "a.c".
391 #define TREE_UNORDERED (-1)
392 #define TREE_HAS_DUPS (-2)
394 static int verify_ordered(unsigned mode1
, const char *name1
, unsigned mode2
, const char *name2
)
396 int len1
= strlen(name1
);
397 int len2
= strlen(name2
);
398 int len
= len1
< len2
? len1
: len2
;
399 unsigned char c1
, c2
;
402 cmp
= memcmp(name1
, name2
, len
);
406 return TREE_UNORDERED
;
409 * Ok, the first <len> characters are the same.
410 * Now we need to order the next one, but turn
411 * a '\0' into a '/' for a directory entry.
417 * git-write-tree used to write out a nonsense tree that has
418 * entries with the same name, one blob and one tree. Make
419 * sure we do not have duplicate entries.
421 return TREE_HAS_DUPS
;
422 if (!c1
&& S_ISDIR(mode1
))
424 if (!c2
&& S_ISDIR(mode2
))
426 return c1
< c2
? 0 : TREE_UNORDERED
;
429 static int fsck_tree(struct tree
*item
, struct fsck_options
*options
)
432 int has_null_sha1
= 0;
433 int has_full_path
= 0;
434 int has_empty_name
= 0;
438 int has_zero_pad
= 0;
439 int has_bad_modes
= 0;
440 int has_dup_entries
= 0;
441 int not_properly_sorted
= 0;
442 struct tree_desc desc
;
446 init_tree_desc(&desc
, item
->buffer
, item
->size
);
454 const struct object_id
*oid
;
456 oid
= tree_entry_extract(&desc
, &name
, &mode
);
458 has_null_sha1
|= is_null_oid(oid
);
459 has_full_path
|= !!strchr(name
, '/');
460 has_empty_name
|= !*name
;
461 has_dot
|= !strcmp(name
, ".");
462 has_dotdot
|= !strcmp(name
, "..");
463 has_dotgit
|= (!strcmp(name
, ".git") ||
464 is_hfs_dotgit(name
) ||
465 is_ntfs_dotgit(name
));
466 has_zero_pad
|= *(char *)desc
.buffer
== '0';
467 update_tree_entry(&desc
);
480 * This is nonstandard, but we had a few of these
481 * early on when we honored the full set of mode
485 if (!options
->strict
)
492 switch (verify_ordered(o_mode
, o_name
, mode
, name
)) {
494 not_properly_sorted
= 1;
510 retval
+= report(options
, &item
->object
, FSCK_MSG_NULL_SHA1
, "contains entries pointing to null sha1");
512 retval
+= report(options
, &item
->object
, FSCK_MSG_FULL_PATHNAME
, "contains full pathnames");
514 retval
+= report(options
, &item
->object
, FSCK_MSG_EMPTY_NAME
, "contains empty pathname");
516 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOT
, "contains '.'");
518 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOTDOT
, "contains '..'");
520 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOTGIT
, "contains '.git'");
522 retval
+= report(options
, &item
->object
, FSCK_MSG_ZERO_PADDED_FILEMODE
, "contains zero-padded file modes");
524 retval
+= report(options
, &item
->object
, FSCK_MSG_BAD_FILEMODE
, "contains bad file modes");
526 retval
+= report(options
, &item
->object
, FSCK_MSG_DUPLICATE_ENTRIES
, "contains duplicate file entries");
527 if (not_properly_sorted
)
528 retval
+= report(options
, &item
->object
, FSCK_MSG_TREE_NOT_SORTED
, "not properly sorted");
532 static int verify_headers(const void *data
, unsigned long size
,
533 struct object
*obj
, struct fsck_options
*options
)
535 const char *buffer
= (const char *)data
;
538 for (i
= 0; i
< size
; i
++) {
541 return report(options
, obj
,
542 FSCK_MSG_NUL_IN_HEADER
,
543 "unterminated header: NUL at offset %ld", i
);
545 if (i
+ 1 < size
&& buffer
[i
+ 1] == '\n')
551 * We did not find double-LF that separates the header
552 * and the body. Not having a body is not a crime but
553 * we do want to see the terminating LF for the last header
556 if (size
&& buffer
[size
- 1] == '\n')
559 return report(options
, obj
,
560 FSCK_MSG_UNTERMINATED_HEADER
, "unterminated header");
563 static int fsck_ident(const char **ident
, struct object
*obj
, struct fsck_options
*options
)
565 const char *p
= *ident
;
568 *ident
= strchrnul(*ident
, '\n');
573 return report(options
, obj
, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL
, "invalid author/committer line - missing space before email");
574 p
+= strcspn(p
, "<>\n");
576 return report(options
, obj
, FSCK_MSG_BAD_NAME
, "invalid author/committer line - bad name");
578 return report(options
, obj
, FSCK_MSG_MISSING_EMAIL
, "invalid author/committer line - missing email");
580 return report(options
, obj
, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL
, "invalid author/committer line - missing space before email");
582 p
+= strcspn(p
, "<>\n");
584 return report(options
, obj
, FSCK_MSG_BAD_EMAIL
, "invalid author/committer line - bad email");
587 return report(options
, obj
, FSCK_MSG_MISSING_SPACE_BEFORE_DATE
, "invalid author/committer line - missing space before date");
589 if (*p
== '0' && p
[1] != ' ')
590 return report(options
, obj
, FSCK_MSG_ZERO_PADDED_DATE
, "invalid author/committer line - zero-padded date");
591 if (date_overflows(strtoul(p
, &end
, 10)))
592 return report(options
, obj
, FSCK_MSG_BAD_DATE_OVERFLOW
, "invalid author/committer line - date causes integer overflow");
593 if ((end
== p
|| *end
!= ' '))
594 return report(options
, obj
, FSCK_MSG_BAD_DATE
, "invalid author/committer line - bad date");
596 if ((*p
!= '+' && *p
!= '-') ||
602 return report(options
, obj
, FSCK_MSG_BAD_TIMEZONE
, "invalid author/committer line - bad time zone");
607 static int fsck_commit_buffer(struct commit
*commit
, const char *buffer
,
608 unsigned long size
, struct fsck_options
*options
)
610 unsigned char tree_sha1
[20], sha1
[20];
611 struct commit_graft
*graft
;
612 unsigned parent_count
, parent_line_count
= 0, author_count
;
614 const char *buffer_begin
= buffer
;
616 if (verify_headers(buffer
, size
, &commit
->object
, options
))
619 if (!skip_prefix(buffer
, "tree ", &buffer
))
620 return report(options
, &commit
->object
, FSCK_MSG_MISSING_TREE
, "invalid format - expected 'tree' line");
621 if (get_sha1_hex(buffer
, tree_sha1
) || buffer
[40] != '\n') {
622 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_TREE_SHA1
, "invalid 'tree' line format - bad sha1");
627 while (skip_prefix(buffer
, "parent ", &buffer
)) {
628 if (get_sha1_hex(buffer
, sha1
) || buffer
[40] != '\n') {
629 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_PARENT_SHA1
, "invalid 'parent' line format - bad sha1");
636 graft
= lookup_commit_graft(commit
->object
.oid
.hash
);
637 parent_count
= commit_list_count(commit
->parents
);
639 if (graft
->nr_parent
== -1 && !parent_count
)
640 ; /* shallow commit */
641 else if (graft
->nr_parent
!= parent_count
) {
642 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_GRAFT
, "graft objects missing");
647 if (parent_count
!= parent_line_count
) {
648 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_PARENT
, "parent objects missing");
654 while (skip_prefix(buffer
, "author ", &buffer
)) {
656 err
= fsck_ident(&buffer
, &commit
->object
, options
);
660 if (author_count
< 1)
661 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_AUTHOR
, "invalid format - expected 'author' line");
662 else if (author_count
> 1)
663 err
= report(options
, &commit
->object
, FSCK_MSG_MULTIPLE_AUTHORS
, "invalid format - multiple 'author' lines");
666 if (!skip_prefix(buffer
, "committer ", &buffer
))
667 return report(options
, &commit
->object
, FSCK_MSG_MISSING_COMMITTER
, "invalid format - expected 'committer' line");
668 err
= fsck_ident(&buffer
, &commit
->object
, options
);
672 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_TREE
, "could not load commit's tree %s", sha1_to_hex(tree_sha1
));
676 if (memchr(buffer_begin
, '\0', size
)) {
677 err
= report(options
, &commit
->object
, FSCK_MSG_NUL_IN_COMMIT
,
678 "NUL byte in the commit object body");
685 static int fsck_commit(struct commit
*commit
, const char *data
,
686 unsigned long size
, struct fsck_options
*options
)
688 const char *buffer
= data
? data
: get_commit_buffer(commit
, &size
);
689 int ret
= fsck_commit_buffer(commit
, buffer
, size
, options
);
691 unuse_commit_buffer(commit
, buffer
);
695 static int fsck_tag_buffer(struct tag
*tag
, const char *data
,
696 unsigned long size
, struct fsck_options
*options
)
698 unsigned char sha1
[20];
701 char *to_free
= NULL
, *eol
;
702 struct strbuf sb
= STRBUF_INIT
;
707 enum object_type type
;
710 read_sha1_file(tag
->object
.oid
.hash
, &type
, &size
);
712 return report(options
, &tag
->object
,
713 FSCK_MSG_MISSING_TAG_OBJECT
,
714 "cannot read tag object");
716 if (type
!= OBJ_TAG
) {
717 ret
= report(options
, &tag
->object
,
718 FSCK_MSG_TAG_OBJECT_NOT_TAG
,
719 "expected tag got %s",
725 ret
= verify_headers(buffer
, size
, &tag
->object
, options
);
729 if (!skip_prefix(buffer
, "object ", &buffer
)) {
730 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_OBJECT
, "invalid format - expected 'object' line");
733 if (get_sha1_hex(buffer
, sha1
) || buffer
[40] != '\n') {
734 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_OBJECT_SHA1
, "invalid 'object' line format - bad sha1");
740 if (!skip_prefix(buffer
, "type ", &buffer
)) {
741 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TYPE_ENTRY
, "invalid format - expected 'type' line");
744 eol
= strchr(buffer
, '\n');
746 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TYPE
, "invalid format - unexpected end after 'type' line");
749 if (type_from_string_gently(buffer
, eol
- buffer
, 1) < 0)
750 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_TYPE
, "invalid 'type' value");
755 if (!skip_prefix(buffer
, "tag ", &buffer
)) {
756 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAG_ENTRY
, "invalid format - expected 'tag' line");
759 eol
= strchr(buffer
, '\n');
761 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAG
, "invalid format - unexpected end after 'type' line");
764 strbuf_addf(&sb
, "refs/tags/%.*s", (int)(eol
- buffer
), buffer
);
765 if (check_refname_format(sb
.buf
, 0)) {
766 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_TAG_NAME
,
767 "invalid 'tag' name: %.*s",
768 (int)(eol
- buffer
), buffer
);
774 if (!skip_prefix(buffer
, "tagger ", &buffer
)) {
775 /* early tags do not contain 'tagger' lines; warn only */
776 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAGGER_ENTRY
, "invalid format - expected 'tagger' line");
781 ret
= fsck_ident(&buffer
, &tag
->object
, options
);
789 static int fsck_tag(struct tag
*tag
, const char *data
,
790 unsigned long size
, struct fsck_options
*options
)
792 struct object
*tagged
= tag
->tagged
;
795 return report(options
, &tag
->object
, FSCK_MSG_BAD_TAG_OBJECT
, "could not load tagged object");
797 return fsck_tag_buffer(tag
, data
, size
, options
);
800 int fsck_object(struct object
*obj
, void *data
, unsigned long size
,
801 struct fsck_options
*options
)
804 return report(options
, obj
, FSCK_MSG_BAD_OBJECT_SHA1
, "no valid object to fsck");
806 if (obj
->type
== OBJ_BLOB
)
808 if (obj
->type
== OBJ_TREE
)
809 return fsck_tree((struct tree
*) obj
, options
);
810 if (obj
->type
== OBJ_COMMIT
)
811 return fsck_commit((struct commit
*) obj
, (const char *) data
,
813 if (obj
->type
== OBJ_TAG
)
814 return fsck_tag((struct tag
*) obj
, (const char *) data
,
817 return report(options
, obj
, FSCK_MSG_UNKNOWN_TYPE
, "unknown type '%d' (internal fsck error)",
821 int fsck_error_function(struct object
*obj
, int msg_type
, const char *message
)
823 if (msg_type
== FSCK_WARN
) {
824 warning("object %s: %s", oid_to_hex(&obj
->oid
), message
);
827 error("object %s: %s", oid_to_hex(&obj
->oid
), message
);