2 #include "object-store.h"
3 #include "repository.h"
17 #include "submodule-config.h"
19 #include "credential.h"
22 static struct oidset gitmodules_found
= OIDSET_INIT
;
23 static struct oidset gitmodules_done
= OIDSET_INIT
;
28 #define FOREACH_MSG_ID(FUNC) \
30 FUNC(NUL_IN_HEADER, FATAL) \
31 FUNC(UNTERMINATED_HEADER, FATAL) \
33 FUNC(BAD_DATE, ERROR) \
34 FUNC(BAD_DATE_OVERFLOW, ERROR) \
35 FUNC(BAD_EMAIL, ERROR) \
36 FUNC(BAD_NAME, ERROR) \
37 FUNC(BAD_OBJECT_SHA1, ERROR) \
38 FUNC(BAD_PARENT_SHA1, ERROR) \
39 FUNC(BAD_TAG_OBJECT, ERROR) \
40 FUNC(BAD_TIMEZONE, ERROR) \
41 FUNC(BAD_TREE, ERROR) \
42 FUNC(BAD_TREE_SHA1, ERROR) \
43 FUNC(BAD_TYPE, ERROR) \
44 FUNC(DUPLICATE_ENTRIES, ERROR) \
45 FUNC(MISSING_AUTHOR, ERROR) \
46 FUNC(MISSING_COMMITTER, ERROR) \
47 FUNC(MISSING_EMAIL, ERROR) \
48 FUNC(MISSING_GRAFT, ERROR) \
49 FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
50 FUNC(MISSING_OBJECT, ERROR) \
51 FUNC(MISSING_PARENT, ERROR) \
52 FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
53 FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
54 FUNC(MISSING_TAG, ERROR) \
55 FUNC(MISSING_TAG_ENTRY, ERROR) \
56 FUNC(MISSING_TAG_OBJECT, ERROR) \
57 FUNC(MISSING_TREE, ERROR) \
58 FUNC(MISSING_TREE_OBJECT, ERROR) \
59 FUNC(MISSING_TYPE, ERROR) \
60 FUNC(MISSING_TYPE_ENTRY, ERROR) \
61 FUNC(MULTIPLE_AUTHORS, ERROR) \
62 FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
63 FUNC(TREE_NOT_SORTED, ERROR) \
64 FUNC(UNKNOWN_TYPE, ERROR) \
65 FUNC(ZERO_PADDED_DATE, ERROR) \
66 FUNC(GITMODULES_MISSING, ERROR) \
67 FUNC(GITMODULES_BLOB, ERROR) \
68 FUNC(GITMODULES_LARGE, ERROR) \
69 FUNC(GITMODULES_NAME, ERROR) \
70 FUNC(GITMODULES_SYMLINK, ERROR) \
71 FUNC(GITMODULES_URL, ERROR) \
72 FUNC(GITMODULES_PATH, ERROR) \
73 FUNC(GITMODULES_UPDATE, ERROR) \
75 FUNC(BAD_FILEMODE, WARN) \
76 FUNC(EMPTY_NAME, WARN) \
77 FUNC(FULL_PATHNAME, WARN) \
79 FUNC(HAS_DOTDOT, WARN) \
80 FUNC(HAS_DOTGIT, WARN) \
81 FUNC(NULL_SHA1, WARN) \
82 FUNC(ZERO_PADDED_FILEMODE, WARN) \
83 FUNC(NUL_IN_COMMIT, WARN) \
84 /* infos (reported as warnings, but ignored by default) */ \
85 FUNC(GITMODULES_PARSE, INFO) \
86 FUNC(BAD_TAG_NAME, INFO) \
87 FUNC(MISSING_TAGGER_ENTRY, INFO)
89 #define MSG_ID(id, msg_type) FSCK_MSG_##id,
91 FOREACH_MSG_ID(MSG_ID
)
97 #define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
99 const char *id_string
;
100 const char *downcased
;
101 const char *camelcased
;
103 } msg_id_info
[FSCK_MSG_MAX
+ 1] = {
104 FOREACH_MSG_ID(MSG_ID
)
105 { NULL
, NULL
, NULL
, -1 }
109 static void prepare_msg_ids(void)
113 if (msg_id_info
[0].downcased
)
116 /* convert id_string to lower case, without underscores. */
117 for (i
= 0; i
< FSCK_MSG_MAX
; i
++) {
118 const char *p
= msg_id_info
[i
].id_string
;
120 char *q
= xmalloc(len
);
122 msg_id_info
[i
].downcased
= q
;
127 *(q
)++ = tolower(*(p
)++);
130 p
= msg_id_info
[i
].id_string
;
132 msg_id_info
[i
].camelcased
= q
;
139 *q
++ = tolower(*p
++);
146 static int parse_msg_id(const char *text
)
152 for (i
= 0; i
< FSCK_MSG_MAX
; i
++)
153 if (!strcmp(text
, msg_id_info
[i
].downcased
))
159 void list_config_fsck_msg_ids(struct string_list
*list
, const char *prefix
)
165 for (i
= 0; i
< FSCK_MSG_MAX
; i
++)
166 list_config_item(list
, prefix
, msg_id_info
[i
].camelcased
);
169 static int fsck_msg_type(enum fsck_msg_id msg_id
,
170 struct fsck_options
*options
)
174 assert(msg_id
>= 0 && msg_id
< FSCK_MSG_MAX
);
176 if (options
->msg_type
)
177 msg_type
= options
->msg_type
[msg_id
];
179 msg_type
= msg_id_info
[msg_id
].msg_type
;
180 if (options
->strict
&& msg_type
== FSCK_WARN
)
181 msg_type
= FSCK_ERROR
;
187 static void init_skiplist(struct fsck_options
*options
, const char *path
)
190 struct strbuf sb
= STRBUF_INIT
;
191 struct object_id oid
;
193 fp
= fopen(path
, "r");
195 die("Could not open skip list: %s", path
);
196 while (!strbuf_getline(&sb
, fp
)) {
201 * Allow trailing comments, leading whitespace
202 * (including before commits), and empty or whitespace
205 hash
= strchr(sb
.buf
, '#');
207 strbuf_setlen(&sb
, hash
- sb
.buf
);
212 if (parse_oid_hex(sb
.buf
, &oid
, &p
) || *p
!= '\0')
213 die("Invalid SHA-1: %s", sb
.buf
);
214 oidset_insert(&options
->skiplist
, &oid
);
217 die_errno("Could not read '%s'", path
);
222 static int parse_msg_type(const char *str
)
224 if (!strcmp(str
, "error"))
226 else if (!strcmp(str
, "warn"))
228 else if (!strcmp(str
, "ignore"))
231 die("Unknown fsck message type: '%s'", str
);
234 int is_valid_msg_type(const char *msg_id
, const char *msg_type
)
236 if (parse_msg_id(msg_id
) < 0)
238 parse_msg_type(msg_type
);
242 void fsck_set_msg_type(struct fsck_options
*options
,
243 const char *msg_id
, const char *msg_type
)
245 int id
= parse_msg_id(msg_id
), type
;
248 die("Unhandled message id: %s", msg_id
);
249 type
= parse_msg_type(msg_type
);
251 if (type
!= FSCK_ERROR
&& msg_id_info
[id
].msg_type
== FSCK_FATAL
)
252 die("Cannot demote %s to %s", msg_id
, msg_type
);
254 if (!options
->msg_type
) {
257 ALLOC_ARRAY(msg_type
, FSCK_MSG_MAX
);
258 for (i
= 0; i
< FSCK_MSG_MAX
; i
++)
259 msg_type
[i
] = fsck_msg_type(i
, options
);
260 options
->msg_type
= msg_type
;
263 options
->msg_type
[id
] = type
;
266 void fsck_set_msg_types(struct fsck_options
*options
, const char *values
)
268 char *buf
= xstrdup(values
), *to_free
= buf
;
272 int len
= strcspn(buf
, " ,|"), equal
;
282 equal
< len
&& buf
[equal
] != '=' && buf
[equal
] != ':';
284 buf
[equal
] = tolower(buf
[equal
]);
287 if (!strcmp(buf
, "skiplist")) {
289 die("skiplist requires a path");
290 init_skiplist(options
, buf
+ equal
+ 1);
296 die("Missing '=': '%s'", buf
);
298 fsck_set_msg_type(options
, buf
, buf
+ equal
+ 1);
304 static void append_msg_id(struct strbuf
*sb
, const char *msg_id
)
307 char c
= *(msg_id
)++;
312 strbuf_addch(sb
, tolower(c
));
315 strbuf_addch(sb
, *(msg_id
)++);
319 strbuf_addstr(sb
, ": ");
322 static int object_on_skiplist(struct fsck_options
*opts
, struct object
*obj
)
324 return opts
&& obj
&& oidset_contains(&opts
->skiplist
, &obj
->oid
);
327 __attribute__((format (printf
, 4, 5)))
328 static int report(struct fsck_options
*options
, struct object
*object
,
329 enum fsck_msg_id id
, const char *fmt
, ...)
332 struct strbuf sb
= STRBUF_INIT
;
333 int msg_type
= fsck_msg_type(id
, options
), result
;
335 if (msg_type
== FSCK_IGNORE
)
338 if (object_on_skiplist(options
, object
))
341 if (msg_type
== FSCK_FATAL
)
342 msg_type
= FSCK_ERROR
;
343 else if (msg_type
== FSCK_INFO
)
344 msg_type
= FSCK_WARN
;
346 append_msg_id(&sb
, msg_id_info
[id
].id_string
);
349 strbuf_vaddf(&sb
, fmt
, ap
);
350 result
= options
->error_func(options
, object
, msg_type
, sb
.buf
);
357 static char *get_object_name(struct fsck_options
*options
, struct object
*obj
)
359 if (!options
->object_names
)
361 return lookup_decoration(options
->object_names
, obj
);
364 static void put_object_name(struct fsck_options
*options
, struct object
*obj
,
365 const char *fmt
, ...)
368 struct strbuf buf
= STRBUF_INIT
;
371 if (!options
->object_names
)
373 existing
= lookup_decoration(options
->object_names
, obj
);
377 strbuf_vaddf(&buf
, fmt
, ap
);
378 add_decoration(options
->object_names
, obj
, strbuf_detach(&buf
, NULL
));
382 static const char *describe_object(struct fsck_options
*o
, struct object
*obj
)
384 static struct strbuf buf
= STRBUF_INIT
;
388 strbuf_addstr(&buf
, oid_to_hex(&obj
->oid
));
389 if (o
->object_names
&& (name
= lookup_decoration(o
->object_names
, obj
)))
390 strbuf_addf(&buf
, " (%s)", name
);
395 static int fsck_walk_tree(struct tree
*tree
, void *data
, struct fsck_options
*options
)
397 struct tree_desc desc
;
398 struct name_entry entry
;
402 if (parse_tree(tree
))
405 name
= get_object_name(options
, &tree
->object
);
406 if (init_tree_desc_gently(&desc
, tree
->buffer
, tree
->size
))
408 while (tree_entry_gently(&desc
, &entry
)) {
412 if (S_ISGITLINK(entry
.mode
))
415 if (S_ISDIR(entry
.mode
)) {
416 obj
= (struct object
*)lookup_tree(the_repository
, entry
.oid
);
418 put_object_name(options
, obj
, "%s%s/", name
,
420 result
= options
->walk(obj
, OBJ_TREE
, data
, options
);
422 else if (S_ISREG(entry
.mode
) || S_ISLNK(entry
.mode
)) {
423 obj
= (struct object
*)lookup_blob(the_repository
, entry
.oid
);
425 put_object_name(options
, obj
, "%s%s", name
,
427 result
= options
->walk(obj
, OBJ_BLOB
, data
, options
);
430 result
= error("in tree %s: entry %s has bad mode %.6o",
431 describe_object(options
, &tree
->object
), entry
.path
, entry
.mode
);
441 static int fsck_walk_commit(struct commit
*commit
, void *data
, struct fsck_options
*options
)
443 int counter
= 0, generation
= 0, name_prefix_len
= 0;
444 struct commit_list
*parents
;
449 if (parse_commit(commit
))
452 name
= get_object_name(options
, &commit
->object
);
454 put_object_name(options
, &get_commit_tree(commit
)->object
,
457 result
= options
->walk((struct object
*)get_commit_tree(commit
),
458 OBJ_TREE
, data
, options
);
463 parents
= commit
->parents
;
464 if (name
&& parents
) {
465 int len
= strlen(name
), power
;
467 if (len
&& name
[len
- 1] == '^') {
469 name_prefix_len
= len
- 1;
471 else { /* parse ~<generation> suffix */
472 for (generation
= 0, power
= 1;
473 len
&& isdigit(name
[len
- 1]);
475 generation
+= power
* (name
[--len
] - '0');
476 if (power
> 1 && len
&& name
[len
- 1] == '~')
477 name_prefix_len
= len
- 1;
483 struct object
*obj
= &parents
->item
->object
;
486 put_object_name(options
, obj
, "%s^%d",
488 else if (generation
> 0)
489 put_object_name(options
, obj
, "%.*s~%d",
490 name_prefix_len
, name
, generation
+ 1);
492 put_object_name(options
, obj
, "%s^", name
);
494 result
= options
->walk((struct object
*)parents
->item
, OBJ_COMMIT
, data
, options
);
499 parents
= parents
->next
;
504 static int fsck_walk_tag(struct tag
*tag
, void *data
, struct fsck_options
*options
)
506 char *name
= get_object_name(options
, &tag
->object
);
511 put_object_name(options
, tag
->tagged
, "%s", name
);
512 return options
->walk(tag
->tagged
, OBJ_ANY
, data
, options
);
515 int fsck_walk(struct object
*obj
, void *data
, struct fsck_options
*options
)
520 if (obj
->type
== OBJ_NONE
)
521 parse_object(the_repository
, &obj
->oid
);
527 return fsck_walk_tree((struct tree
*)obj
, data
, options
);
529 return fsck_walk_commit((struct commit
*)obj
, data
, options
);
531 return fsck_walk_tag((struct tag
*)obj
, data
, options
);
533 error("Unknown object type for %s", describe_object(options
, obj
));
539 * The entries in a tree are ordered in the _path_ order,
540 * which means that a directory entry is ordered by adding
541 * a slash to the end of it.
543 * So a directory called "a" is ordered _after_ a file
544 * called "a.c", because "a/" sorts after "a.c".
546 #define TREE_UNORDERED (-1)
547 #define TREE_HAS_DUPS (-2)
549 static int verify_ordered(unsigned mode1
, const char *name1
, unsigned mode2
, const char *name2
)
551 int len1
= strlen(name1
);
552 int len2
= strlen(name2
);
553 int len
= len1
< len2
? len1
: len2
;
554 unsigned char c1
, c2
;
557 cmp
= memcmp(name1
, name2
, len
);
561 return TREE_UNORDERED
;
564 * Ok, the first <len> characters are the same.
565 * Now we need to order the next one, but turn
566 * a '\0' into a '/' for a directory entry.
572 * git-write-tree used to write out a nonsense tree that has
573 * entries with the same name, one blob and one tree. Make
574 * sure we do not have duplicate entries.
576 return TREE_HAS_DUPS
;
577 if (!c1
&& S_ISDIR(mode1
))
579 if (!c2
&& S_ISDIR(mode2
))
581 return c1
< c2
? 0 : TREE_UNORDERED
;
584 static int fsck_tree(struct tree
*item
, struct fsck_options
*options
)
587 int has_null_sha1
= 0;
588 int has_full_path
= 0;
589 int has_empty_name
= 0;
593 int has_zero_pad
= 0;
594 int has_bad_modes
= 0;
595 int has_dup_entries
= 0;
596 int not_properly_sorted
= 0;
597 struct tree_desc desc
;
601 if (init_tree_desc_gently(&desc
, item
->buffer
, item
->size
)) {
602 retval
+= report(options
, &item
->object
, FSCK_MSG_BAD_TREE
, "cannot be parsed as a tree");
611 const char *name
, *backslash
;
612 const struct object_id
*oid
;
614 oid
= tree_entry_extract(&desc
, &name
, &mode
);
616 has_null_sha1
|= is_null_oid(oid
);
617 has_full_path
|= !!strchr(name
, '/');
618 has_empty_name
|= !*name
;
619 has_dot
|= !strcmp(name
, ".");
620 has_dotdot
|= !strcmp(name
, "..");
621 has_dotgit
|= is_hfs_dotgit(name
) || is_ntfs_dotgit(name
);
622 has_zero_pad
|= *(char *)desc
.buffer
== '0';
624 if (is_hfs_dotgitmodules(name
) || is_ntfs_dotgitmodules(name
)) {
626 oidset_insert(&gitmodules_found
, oid
);
628 retval
+= report(options
, &item
->object
,
629 FSCK_MSG_GITMODULES_SYMLINK
,
630 ".gitmodules is a symbolic link");
633 if ((backslash
= strchr(name
, '\\'))) {
636 has_dotgit
|= is_ntfs_dotgit(backslash
);
637 if (is_ntfs_dotgitmodules(backslash
)) {
639 oidset_insert(&gitmodules_found
, oid
);
641 retval
+= report(options
, &item
->object
,
642 FSCK_MSG_GITMODULES_SYMLINK
,
643 ".gitmodules is a symbolic link");
645 backslash
= strchr(backslash
, '\\');
649 if (update_tree_entry_gently(&desc
)) {
650 retval
+= report(options
, &item
->object
, FSCK_MSG_BAD_TREE
, "cannot be parsed as a tree");
665 * This is nonstandard, but we had a few of these
666 * early on when we honored the full set of mode
670 if (!options
->strict
)
678 switch (verify_ordered(o_mode
, o_name
, mode
, name
)) {
680 not_properly_sorted
= 1;
695 retval
+= report(options
, &item
->object
, FSCK_MSG_NULL_SHA1
, "contains entries pointing to null sha1");
697 retval
+= report(options
, &item
->object
, FSCK_MSG_FULL_PATHNAME
, "contains full pathnames");
699 retval
+= report(options
, &item
->object
, FSCK_MSG_EMPTY_NAME
, "contains empty pathname");
701 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOT
, "contains '.'");
703 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOTDOT
, "contains '..'");
705 retval
+= report(options
, &item
->object
, FSCK_MSG_HAS_DOTGIT
, "contains '.git'");
707 retval
+= report(options
, &item
->object
, FSCK_MSG_ZERO_PADDED_FILEMODE
, "contains zero-padded file modes");
709 retval
+= report(options
, &item
->object
, FSCK_MSG_BAD_FILEMODE
, "contains bad file modes");
711 retval
+= report(options
, &item
->object
, FSCK_MSG_DUPLICATE_ENTRIES
, "contains duplicate file entries");
712 if (not_properly_sorted
)
713 retval
+= report(options
, &item
->object
, FSCK_MSG_TREE_NOT_SORTED
, "not properly sorted");
717 static int verify_headers(const void *data
, unsigned long size
,
718 struct object
*obj
, struct fsck_options
*options
)
720 const char *buffer
= (const char *)data
;
723 for (i
= 0; i
< size
; i
++) {
726 return report(options
, obj
,
727 FSCK_MSG_NUL_IN_HEADER
,
728 "unterminated header: NUL at offset %ld", i
);
730 if (i
+ 1 < size
&& buffer
[i
+ 1] == '\n')
736 * We did not find double-LF that separates the header
737 * and the body. Not having a body is not a crime but
738 * we do want to see the terminating LF for the last header
741 if (size
&& buffer
[size
- 1] == '\n')
744 return report(options
, obj
,
745 FSCK_MSG_UNTERMINATED_HEADER
, "unterminated header");
748 static int fsck_ident(const char **ident
, struct object
*obj
, struct fsck_options
*options
)
750 const char *p
= *ident
;
753 *ident
= strchrnul(*ident
, '\n');
758 return report(options
, obj
, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL
, "invalid author/committer line - missing space before email");
759 p
+= strcspn(p
, "<>\n");
761 return report(options
, obj
, FSCK_MSG_BAD_NAME
, "invalid author/committer line - bad name");
763 return report(options
, obj
, FSCK_MSG_MISSING_EMAIL
, "invalid author/committer line - missing email");
765 return report(options
, obj
, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL
, "invalid author/committer line - missing space before email");
767 p
+= strcspn(p
, "<>\n");
769 return report(options
, obj
, FSCK_MSG_BAD_EMAIL
, "invalid author/committer line - bad email");
772 return report(options
, obj
, FSCK_MSG_MISSING_SPACE_BEFORE_DATE
, "invalid author/committer line - missing space before date");
774 if (*p
== '0' && p
[1] != ' ')
775 return report(options
, obj
, FSCK_MSG_ZERO_PADDED_DATE
, "invalid author/committer line - zero-padded date");
776 if (date_overflows(parse_timestamp(p
, &end
, 10)))
777 return report(options
, obj
, FSCK_MSG_BAD_DATE_OVERFLOW
, "invalid author/committer line - date causes integer overflow");
778 if ((end
== p
|| *end
!= ' '))
779 return report(options
, obj
, FSCK_MSG_BAD_DATE
, "invalid author/committer line - bad date");
781 if ((*p
!= '+' && *p
!= '-') ||
787 return report(options
, obj
, FSCK_MSG_BAD_TIMEZONE
, "invalid author/committer line - bad time zone");
792 static int fsck_commit_buffer(struct commit
*commit
, const char *buffer
,
793 unsigned long size
, struct fsck_options
*options
)
795 struct object_id tree_oid
, oid
;
796 struct commit_graft
*graft
;
797 unsigned parent_count
, parent_line_count
= 0, author_count
;
799 const char *buffer_begin
= buffer
;
802 if (verify_headers(buffer
, size
, &commit
->object
, options
))
805 if (!skip_prefix(buffer
, "tree ", &buffer
))
806 return report(options
, &commit
->object
, FSCK_MSG_MISSING_TREE
, "invalid format - expected 'tree' line");
807 if (parse_oid_hex(buffer
, &tree_oid
, &p
) || *p
!= '\n') {
808 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_TREE_SHA1
, "invalid 'tree' line format - bad sha1");
813 while (skip_prefix(buffer
, "parent ", &buffer
)) {
814 if (parse_oid_hex(buffer
, &oid
, &p
) || *p
!= '\n') {
815 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_PARENT_SHA1
, "invalid 'parent' line format - bad sha1");
822 graft
= lookup_commit_graft(the_repository
, &commit
->object
.oid
);
823 parent_count
= commit_list_count(commit
->parents
);
825 if (graft
->nr_parent
== -1 && !parent_count
)
826 ; /* shallow commit */
827 else if (graft
->nr_parent
!= parent_count
) {
828 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_GRAFT
, "graft objects missing");
833 if (parent_count
!= parent_line_count
) {
834 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_PARENT
, "parent objects missing");
840 while (skip_prefix(buffer
, "author ", &buffer
)) {
842 err
= fsck_ident(&buffer
, &commit
->object
, options
);
846 if (author_count
< 1)
847 err
= report(options
, &commit
->object
, FSCK_MSG_MISSING_AUTHOR
, "invalid format - expected 'author' line");
848 else if (author_count
> 1)
849 err
= report(options
, &commit
->object
, FSCK_MSG_MULTIPLE_AUTHORS
, "invalid format - multiple 'author' lines");
852 if (!skip_prefix(buffer
, "committer ", &buffer
))
853 return report(options
, &commit
->object
, FSCK_MSG_MISSING_COMMITTER
, "invalid format - expected 'committer' line");
854 err
= fsck_ident(&buffer
, &commit
->object
, options
);
857 if (!get_commit_tree(commit
)) {
858 err
= report(options
, &commit
->object
, FSCK_MSG_BAD_TREE
, "could not load commit's tree %s", oid_to_hex(&tree_oid
));
862 if (memchr(buffer_begin
, '\0', size
)) {
863 err
= report(options
, &commit
->object
, FSCK_MSG_NUL_IN_COMMIT
,
864 "NUL byte in the commit object body");
871 static int fsck_commit(struct commit
*commit
, const char *data
,
872 unsigned long size
, struct fsck_options
*options
)
874 const char *buffer
= data
? data
: get_commit_buffer(commit
, &size
);
875 int ret
= fsck_commit_buffer(commit
, buffer
, size
, options
);
877 unuse_commit_buffer(commit
, buffer
);
881 static int fsck_tag_buffer(struct tag
*tag
, const char *data
,
882 unsigned long size
, struct fsck_options
*options
)
884 struct object_id oid
;
887 char *to_free
= NULL
, *eol
;
888 struct strbuf sb
= STRBUF_INIT
;
894 enum object_type type
;
897 read_object_file(&tag
->object
.oid
, &type
, &size
);
899 return report(options
, &tag
->object
,
900 FSCK_MSG_MISSING_TAG_OBJECT
,
901 "cannot read tag object");
903 if (type
!= OBJ_TAG
) {
904 ret
= report(options
, &tag
->object
,
905 FSCK_MSG_TAG_OBJECT_NOT_TAG
,
906 "expected tag got %s",
912 ret
= verify_headers(buffer
, size
, &tag
->object
, options
);
916 if (!skip_prefix(buffer
, "object ", &buffer
)) {
917 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_OBJECT
, "invalid format - expected 'object' line");
920 if (parse_oid_hex(buffer
, &oid
, &p
) || *p
!= '\n') {
921 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_OBJECT_SHA1
, "invalid 'object' line format - bad sha1");
927 if (!skip_prefix(buffer
, "type ", &buffer
)) {
928 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TYPE_ENTRY
, "invalid format - expected 'type' line");
931 eol
= strchr(buffer
, '\n');
933 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TYPE
, "invalid format - unexpected end after 'type' line");
936 if (type_from_string_gently(buffer
, eol
- buffer
, 1) < 0)
937 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_TYPE
, "invalid 'type' value");
942 if (!skip_prefix(buffer
, "tag ", &buffer
)) {
943 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAG_ENTRY
, "invalid format - expected 'tag' line");
946 eol
= strchr(buffer
, '\n');
948 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAG
, "invalid format - unexpected end after 'type' line");
951 strbuf_addf(&sb
, "refs/tags/%.*s", (int)(eol
- buffer
), buffer
);
952 if (check_refname_format(sb
.buf
, 0)) {
953 ret
= report(options
, &tag
->object
, FSCK_MSG_BAD_TAG_NAME
,
954 "invalid 'tag' name: %.*s",
955 (int)(eol
- buffer
), buffer
);
961 if (!skip_prefix(buffer
, "tagger ", &buffer
)) {
962 /* early tags do not contain 'tagger' lines; warn only */
963 ret
= report(options
, &tag
->object
, FSCK_MSG_MISSING_TAGGER_ENTRY
, "invalid format - expected 'tagger' line");
968 ret
= fsck_ident(&buffer
, &tag
->object
, options
);
976 static int fsck_tag(struct tag
*tag
, const char *data
,
977 unsigned long size
, struct fsck_options
*options
)
979 struct object
*tagged
= tag
->tagged
;
982 return report(options
, &tag
->object
, FSCK_MSG_BAD_TAG_OBJECT
, "could not load tagged object");
984 return fsck_tag_buffer(tag
, data
, size
, options
);
988 * Like builtin/submodule--helper.c's starts_with_dot_slash, but without
989 * relying on the platform-dependent is_dir_sep helper.
991 * This is for use in checking whether a submodule URL is interpreted as
992 * relative to the current directory on any platform, since \ is a
993 * directory separator on Windows but not on other platforms.
995 static int starts_with_dot_slash(const char *str
)
997 return str
[0] == '.' && (str
[1] == '/' || str
[1] == '\\');
1001 * Like starts_with_dot_slash, this is a variant of submodule--helper's
1002 * helper of the same name with the twist that it accepts backslash as a
1003 * directory separator even on non-Windows platforms.
1005 static int starts_with_dot_dot_slash(const char *str
)
1007 return str
[0] == '.' && starts_with_dot_slash(str
+ 1);
1010 static int submodule_url_is_relative(const char *url
)
1012 return starts_with_dot_slash(url
) || starts_with_dot_dot_slash(url
);
1016 * Count directory components that a relative submodule URL should chop
1017 * from the remote_url it is to be resolved against.
1019 * In other words, this counts "../" components at the start of a
1022 * Returns the number of directory components to chop and writes a
1023 * pointer to the next character of url after all leading "./" and
1024 * "../" components to out.
1026 static int count_leading_dotdots(const char *url
, const char **out
)
1030 if (starts_with_dot_dot_slash(url
)) {
1032 url
+= strlen("../");
1035 if (starts_with_dot_slash(url
)) {
1036 url
+= strlen("./");
1044 * Check whether a transport is implemented by git-remote-curl.
1046 * If it is, returns 1 and writes the URL that would be passed to
1047 * git-remote-curl to the "out" parameter.
1049 * Otherwise, returns 0 and leaves "out" untouched.
1052 * http::https://example.com/repo.git -> 1, https://example.com/repo.git
1053 * https://example.com/repo.git -> 1, https://example.com/repo.git
1054 * git://example.com/repo.git -> 0
1056 * This is for use in checking for previously exploitable bugs that
1057 * required a submodule URL to be passed to git-remote-curl.
1059 static int url_to_curl_url(const char *url
, const char **out
)
1062 * We don't need to check for case-aliases, "http.exe", and so
1063 * on because in the default configuration, is_transport_allowed
1064 * prevents URLs with those schemes from being cloned
1067 if (skip_prefix(url
, "http::", out
) ||
1068 skip_prefix(url
, "https::", out
) ||
1069 skip_prefix(url
, "ftp::", out
) ||
1070 skip_prefix(url
, "ftps::", out
))
1072 if (starts_with(url
, "http://") ||
1073 starts_with(url
, "https://") ||
1074 starts_with(url
, "ftp://") ||
1075 starts_with(url
, "ftps://")) {
1082 static int check_submodule_url(const char *url
)
1084 const char *curl_url
;
1086 if (looks_like_command_line_option(url
))
1089 if (submodule_url_is_relative(url
)) {
1095 * This could be appended to an http URL and url-decoded;
1096 * check for malicious characters.
1098 decoded
= url_decode(url
);
1099 has_nl
= !!strchr(decoded
, '\n');
1106 * URLs which escape their root via "../" can overwrite
1107 * the host field and previous components, resolving to
1108 * URLs like https::example.com/submodule.git and
1109 * https:///example.com/submodule.git that were
1110 * susceptible to CVE-2020-11008.
1112 if (count_leading_dotdots(url
, &next
) > 0 &&
1113 (*next
== ':' || *next
== '/'))
1117 else if (url_to_curl_url(url
, &curl_url
)) {
1118 struct credential c
= CREDENTIAL_INIT
;
1120 if (credential_from_url_gently(&c
, curl_url
, 1) ||
1123 credential_clear(&c
);
1130 struct fsck_gitmodules_data
{
1132 struct fsck_options
*options
;
1136 static int fsck_gitmodules_fn(const char *var
, const char *value
, void *vdata
)
1138 struct fsck_gitmodules_data
*data
= vdata
;
1139 const char *subsection
, *key
;
1143 if (parse_config_key(var
, "submodule", &subsection
, &subsection_len
, &key
) < 0 ||
1147 name
= xmemdupz(subsection
, subsection_len
);
1148 if (check_submodule_name(name
) < 0)
1149 data
->ret
|= report(data
->options
, data
->obj
,
1150 FSCK_MSG_GITMODULES_NAME
,
1151 "disallowed submodule name: %s",
1153 if (!strcmp(key
, "url") && value
&&
1154 check_submodule_url(value
) < 0)
1155 data
->ret
|= report(data
->options
, data
->obj
,
1156 FSCK_MSG_GITMODULES_URL
,
1157 "disallowed submodule url: %s",
1159 if (!strcmp(key
, "path") && value
&&
1160 looks_like_command_line_option(value
))
1161 data
->ret
|= report(data
->options
, data
->obj
,
1162 FSCK_MSG_GITMODULES_PATH
,
1163 "disallowed submodule path: %s",
1165 if (!strcmp(key
, "update") && value
&&
1166 parse_submodule_update_type(value
) == SM_UPDATE_COMMAND
)
1167 data
->ret
|= report(data
->options
, data
->obj
,
1168 FSCK_MSG_GITMODULES_UPDATE
,
1169 "disallowed submodule update setting: %s",
1176 static int fsck_blob(struct blob
*blob
, const char *buf
,
1177 unsigned long size
, struct fsck_options
*options
)
1179 struct fsck_gitmodules_data data
;
1180 struct config_options config_opts
= { 0 };
1182 if (!oidset_contains(&gitmodules_found
, &blob
->object
.oid
))
1184 oidset_insert(&gitmodules_done
, &blob
->object
.oid
);
1186 if (object_on_skiplist(options
, &blob
->object
))
1191 * A missing buffer here is a sign that the caller found the
1192 * blob too gigantic to load into memory. Let's just consider
1195 return report(options
, &blob
->object
,
1196 FSCK_MSG_GITMODULES_LARGE
,
1197 ".gitmodules too large to parse");
1200 data
.obj
= &blob
->object
;
1201 data
.options
= options
;
1203 config_opts
.error_action
= CONFIG_ERROR_SILENT
;
1204 if (git_config_from_mem(fsck_gitmodules_fn
, CONFIG_ORIGIN_BLOB
,
1205 ".gitmodules", buf
, size
, &data
, &config_opts
))
1206 data
.ret
|= report(options
, &blob
->object
,
1207 FSCK_MSG_GITMODULES_PARSE
,
1208 "could not parse gitmodules blob");
1213 int fsck_object(struct object
*obj
, void *data
, unsigned long size
,
1214 struct fsck_options
*options
)
1217 return report(options
, obj
, FSCK_MSG_BAD_OBJECT_SHA1
, "no valid object to fsck");
1219 if (obj
->type
== OBJ_BLOB
)
1220 return fsck_blob((struct blob
*)obj
, data
, size
, options
);
1221 if (obj
->type
== OBJ_TREE
)
1222 return fsck_tree((struct tree
*) obj
, options
);
1223 if (obj
->type
== OBJ_COMMIT
)
1224 return fsck_commit((struct commit
*) obj
, (const char *) data
,
1226 if (obj
->type
== OBJ_TAG
)
1227 return fsck_tag((struct tag
*) obj
, (const char *) data
,
1230 return report(options
, obj
, FSCK_MSG_UNKNOWN_TYPE
, "unknown type '%d' (internal fsck error)",
1234 int fsck_error_function(struct fsck_options
*o
,
1235 struct object
*obj
, int msg_type
, const char *message
)
1237 if (msg_type
== FSCK_WARN
) {
1238 warning("object %s: %s", describe_object(o
, obj
), message
);
1241 error("object %s: %s", describe_object(o
, obj
), message
);
1245 int fsck_finish(struct fsck_options
*options
)
1248 struct oidset_iter iter
;
1249 const struct object_id
*oid
;
1251 oidset_iter_init(&gitmodules_found
, &iter
);
1252 while ((oid
= oidset_iter_next(&iter
))) {
1254 enum object_type type
;
1258 if (oidset_contains(&gitmodules_done
, oid
))
1261 blob
= lookup_blob(the_repository
, oid
);
1263 struct object
*obj
= lookup_unknown_object(oid
->hash
);
1264 ret
|= report(options
, obj
,
1265 FSCK_MSG_GITMODULES_BLOB
,
1266 "non-blob found at .gitmodules");
1270 buf
= read_object_file(oid
, &type
, &size
);
1272 if (is_promisor_object(&blob
->object
.oid
))
1274 ret
|= report(options
, &blob
->object
,
1275 FSCK_MSG_GITMODULES_MISSING
,
1276 "unable to read .gitmodules blob");
1280 if (type
== OBJ_BLOB
)
1281 ret
|= fsck_blob(blob
, buf
, size
, options
);
1283 ret
|= report(options
, &blob
->object
,
1284 FSCK_MSG_GITMODULES_BLOB
,
1285 "non-blob found at .gitmodules");
1290 oidset_clear(&gitmodules_found
);
1291 oidset_clear(&gitmodules_done
);