fsck.c

   1 #include "git-compat-util.h"
   2 #include "alloc.h"
   3 #include "date.h"
   4 #include "hex.h"
   5 #include "object-store.h"
   6 #include "repository.h"
   7 #include "object.h"
   8 #include "attr.h"
   9 #include "blob.h"
  10 #include "tree.h"
  11 #include "tree-walk.h"
  12 #include "commit.h"
  13 #include "tag.h"
  14 #include "fsck.h"
  15 #include "refs.h"
  16 #include "url.h"
  17 #include "utf8.h"
  18 #include "decorate.h"
  19 #include "oidset.h"
  20 #include "packfile.h"
  21 #include "submodule-config.h"
  22 #include "config.h"
  23 #include "credential.h"
  24 #include "help.h"
  25
  26 #define STR(x) #x
  27 #define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
  28 static struct {
  29         const char *id_string;
  30         const char *downcased;
  31         const char *camelcased;
  32         enum fsck_msg_type msg_type;
  33 } msg_id_info[FSCK_MSG_MAX + 1] = {
  34         FOREACH_FSCK_MSG_ID(MSG_ID)
  35         { NULL, NULL, NULL, -1 }
  36 };
  37 #undef MSG_ID
  38 #undef STR
  39
  40 static void prepare_msg_ids(void)
  41 {
  42         int i;
  43
  44         if (msg_id_info[0].downcased)
  45                 return;
  46
  47         /* convert id_string to lower case, without underscores. */
  48         for (i = 0; i < FSCK_MSG_MAX; i++) {
  49                 const char *p = msg_id_info[i].id_string;
  50                 int len = strlen(p);
  51                 char *q = xmalloc(len);
  52
  53                 msg_id_info[i].downcased = q;
  54                 while (*p)
  55                         if (*p == '_')
  56                                 p++;
  57                         else
  58                                 *(q)++ = tolower(*(p)++);
  59                 *q = '\0';
  60
  61                 p = msg_id_info[i].id_string;
  62                 q = xmalloc(len);
  63                 msg_id_info[i].camelcased = q;
  64                 while (*p) {
  65                         if (*p == '_') {
  66                                 p++;
  67                                 if (*p)
  68                                         *q++ = *p++;
  69                         } else {
  70                                 *q++ = tolower(*p++);
  71                         }
  72                 }
  73                 *q = '\0';
  74         }
  75 }
  76
  77 static int parse_msg_id(const char *text)
  78 {
  79         int i;
  80
  81         prepare_msg_ids();
  82
  83         for (i = 0; i < FSCK_MSG_MAX; i++)
  84                 if (!strcmp(text, msg_id_info[i].downcased))
  85                         return i;
  86
  87         return -1;
  88 }
  89
  90 void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
  91 {
  92         int i;
  93
  94         prepare_msg_ids();
  95
  96         for (i = 0; i < FSCK_MSG_MAX; i++)
  97                 list_config_item(list, prefix, msg_id_info[i].camelcased);
  98 }
  99
 100 static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
 101         struct fsck_options *options)
 102 {
 103         assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
 104
 105         if (!options->msg_type) {
 106                 enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
 107
 108                 if (options->strict && msg_type == FSCK_WARN)
 109                         msg_type = FSCK_ERROR;
 110                 return msg_type;
 111         }
 112
 113         return options->msg_type[msg_id];
 114 }
 115
 116 static enum fsck_msg_type parse_msg_type(const char *str)
 117 {
 118         if (!strcmp(str, "error"))
 119                 return FSCK_ERROR;
 120         else if (!strcmp(str, "warn"))
 121                 return FSCK_WARN;
 122         else if (!strcmp(str, "ignore"))
 123                 return FSCK_IGNORE;
 124         else
 125                 die("Unknown fsck message type: '%s'", str);
 126 }
 127
 128 int is_valid_msg_type(const char *msg_id, const char *msg_type)
 129 {
 130         if (parse_msg_id(msg_id) < 0)
 131                 return 0;
 132         parse_msg_type(msg_type);
 133         return 1;
 134 }
 135
 136 void fsck_set_msg_type_from_ids(struct fsck_options *options,
 137                                 enum fsck_msg_id msg_id,
 138                                 enum fsck_msg_type msg_type)
 139 {
 140         if (!options->msg_type) {
 141                 int i;
 142                 enum fsck_msg_type *severity;
 143                 ALLOC_ARRAY(severity, FSCK_MSG_MAX);
 144                 for (i = 0; i < FSCK_MSG_MAX; i++)
 145                         severity[i] = fsck_msg_type(i, options);
 146                 options->msg_type = severity;
 147         }
 148
 149         options->msg_type[msg_id] = msg_type;
 150 }
 151
 152 void fsck_set_msg_type(struct fsck_options *options,
 153                        const char *msg_id_str, const char *msg_type_str)
 154 {
 155         int msg_id = parse_msg_id(msg_id_str);
 156         enum fsck_msg_type msg_type = parse_msg_type(msg_type_str);
 157
 158         if (msg_id < 0)
 159                 die("Unhandled message id: %s", msg_id_str);
 160
 161         if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)
 162                 die("Cannot demote %s to %s", msg_id_str, msg_type_str);
 163
 164         fsck_set_msg_type_from_ids(options, msg_id, msg_type);
 165 }
 166
 167 void fsck_set_msg_types(struct fsck_options *options, const char *values)
 168 {
 169         char *buf = xstrdup(values), *to_free = buf;
 170         int done = 0;
 171
 172         while (!done) {
 173                 int len = strcspn(buf, " ,|"), equal;
 174
 175                 done = !buf[len];
 176                 if (!len) {
 177                         buf++;
 178                         continue;
 179                 }
 180                 buf[len] = '\0';
 181
 182                 for (equal = 0;
 183                      equal < len && buf[equal] != '=' && buf[equal] != ':';
 184                      equal++)
 185                         buf[equal] = tolower(buf[equal]);
 186                 buf[equal] = '\0';
 187
 188                 if (!strcmp(buf, "skiplist")) {
 189                         if (equal == len)
 190                                 die("skiplist requires a path");
 191                         oidset_parse_file(&options->skiplist, buf + equal + 1);
 192                         buf += len + 1;
 193                         continue;
 194                 }
 195
 196                 if (equal == len)
 197                         die("Missing '=': '%s'", buf);
 198
 199                 fsck_set_msg_type(options, buf, buf + equal + 1);
 200                 buf += len + 1;
 201         }
 202         free(to_free);
 203 }
 204
 205 static int object_on_skiplist(struct fsck_options *opts,
 206                               const struct object_id *oid)
 207 {
 208         return opts && oid && oidset_contains(&opts->skiplist, oid);
 209 }
 210
 211 __attribute__((format (printf, 5, 6)))
 212 static int report(struct fsck_options *options,
 213                   const struct object_id *oid, enum object_type object_type,
 214                   enum fsck_msg_id msg_id, const char *fmt, ...)
 215 {
 216         va_list ap;
 217         struct strbuf sb = STRBUF_INIT;
 218         enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
 219         int result;
 220
 221         if (msg_type == FSCK_IGNORE)
 222                 return 0;
 223
 224         if (object_on_skiplist(options, oid))
 225                 return 0;
 226
 227         if (msg_type == FSCK_FATAL)
 228                 msg_type = FSCK_ERROR;
 229         else if (msg_type == FSCK_INFO)
 230                 msg_type = FSCK_WARN;
 231
 232         prepare_msg_ids();
 233         strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
 234
 235         va_start(ap, fmt);
 236         strbuf_vaddf(&sb, fmt, ap);
 237         result = options->error_func(options, oid, object_type,
 238                                      msg_type, msg_id, sb.buf);
 239         strbuf_release(&sb);
 240         va_end(ap);
 241
 242         return result;
 243 }
 244
 245 void fsck_enable_object_names(struct fsck_options *options)
 246 {
 247         if (!options->object_names)
 248                 options->object_names = kh_init_oid_map();
 249 }
 250
 251 const char *fsck_get_object_name(struct fsck_options *options,
 252                                  const struct object_id *oid)
 253 {
 254         khiter_t pos;
 255         if (!options->object_names)
 256                 return NULL;
 257         pos = kh_get_oid_map(options->object_names, *oid);
 258         if (pos >= kh_end(options->object_names))
 259                 return NULL;
 260         return kh_value(options->object_names, pos);
 261 }
 262
 263 void fsck_put_object_name(struct fsck_options *options,
 264                           const struct object_id *oid,
 265                           const char *fmt, ...)
 266 {
 267         va_list ap;
 268         struct strbuf buf = STRBUF_INIT;
 269         khiter_t pos;
 270         int hashret;
 271
 272         if (!options->object_names)
 273                 return;
 274
 275         pos = kh_put_oid_map(options->object_names, *oid, &hashret);
 276         if (!hashret)
 277                 return;
 278         va_start(ap, fmt);
 279         strbuf_vaddf(&buf, fmt, ap);
 280         kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);
 281         va_end(ap);
 282 }
 283
 284 const char *fsck_describe_object(struct fsck_options *options,
 285                                  const struct object_id *oid)
 286 {
 287         static struct strbuf bufs[] = {
 288                 STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
 289         };
 290         static int b = 0;
 291         struct strbuf *buf;
 292         const char *name = fsck_get_object_name(options, oid);
 293
 294         buf = bufs + b;
 295         b = (b + 1) % ARRAY_SIZE(bufs);
 296         strbuf_reset(buf);
 297         strbuf_addstr(buf, oid_to_hex(oid));
 298         if (name)
 299                 strbuf_addf(buf, " (%s)", name);
 300
 301         return buf->buf;
 302 }
 303
 304 static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
 305 {
 306         struct tree_desc desc;
 307         struct name_entry entry;
 308         int res = 0;
 309         const char *name;
 310
 311         if (parse_tree(tree))
 312                 return -1;
 313
 314         name = fsck_get_object_name(options, &tree->object.oid);
 315         if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0))
 316                 return -1;
 317         while (tree_entry_gently(&desc, &entry)) {
 318                 struct object *obj;
 319                 int result;
 320
 321                 if (S_ISGITLINK(entry.mode))
 322                         continue;
 323
 324                 if (S_ISDIR(entry.mode)) {
 325                         obj = (struct object *)lookup_tree(the_repository, &entry.oid);
 326                         if (name && obj)
 327                                 fsck_put_object_name(options, &entry.oid, "%s%s/",
 328                                                      name, entry.path);
 329                         result = options->walk(obj, OBJ_TREE, data, options);
 330                 }
 331                 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
 332                         obj = (struct object *)lookup_blob(the_repository, &entry.oid);
 333                         if (name && obj)
 334                                 fsck_put_object_name(options, &entry.oid, "%s%s",
 335                                                      name, entry.path);
 336                         result = options->walk(obj, OBJ_BLOB, data, options);
 337                 }
 338                 else {
 339                         result = error("in tree %s: entry %s has bad mode %.6o",
 340                                        fsck_describe_object(options, &tree->object.oid),
 341                                        entry.path, entry.mode);
 342                 }
 343                 if (result < 0)
 344                         return result;
 345                 if (!res)
 346                         res = result;
 347         }
 348         return res;
 349 }
 350
 351 static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
 352 {
 353         int counter = 0, generation = 0, name_prefix_len = 0;
 354         struct commit_list *parents;
 355         int res;
 356         int result;
 357         const char *name;
 358
 359         if (repo_parse_commit(the_repository, commit))
 360                 return -1;
 361
 362         name = fsck_get_object_name(options, &commit->object.oid);
 363         if (name)
 364                 fsck_put_object_name(options, get_commit_tree_oid(commit),
 365                                      "%s:", name);
 366
 367         result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit),
 368                                OBJ_TREE, data, options);
 369         if (result < 0)
 370                 return result;
 371         res = result;
 372
 373         parents = commit->parents;
 374         if (name && parents) {
 375                 int len = strlen(name), power;
 376
 377                 if (len && name[len - 1] == '^') {
 378                         generation = 1;
 379                         name_prefix_len = len - 1;
 380                 }
 381                 else { /* parse ~<generation> suffix */
 382                         for (generation = 0, power = 1;
 383                              len && isdigit(name[len - 1]);
 384                              power *= 10)
 385                                 generation += power * (name[--len] - '0');
 386                         if (power > 1 && len && name[len - 1] == '~')
 387                                 name_prefix_len = len - 1;
 388                         else {
 389                                 /* Maybe a non-first parent, e.g. HEAD^2 */
 390                                 generation = 0;
 391                                 name_prefix_len = len;
 392                         }
 393                 }
 394         }
 395
 396         while (parents) {
 397                 if (name) {
 398                         struct object_id *oid = &parents->item->object.oid;
 399
 400                         if (counter++)
 401                                 fsck_put_object_name(options, oid, "%s^%d",
 402                                                      name, counter);
 403                         else if (generation > 0)
 404                                 fsck_put_object_name(options, oid, "%.*s~%d",
 405                                                      name_prefix_len, name,
 406                                                      generation + 1);
 407                         else
 408                                 fsck_put_object_name(options, oid, "%s^", name);
 409                 }
 410                 result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
 411                 if (result < 0)
 412                         return result;
 413                 if (!res)
 414                         res = result;
 415                 parents = parents->next;
 416         }
 417         return res;
 418 }
 419
 420 static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
 421 {
 422         const char *name = fsck_get_object_name(options, &tag->object.oid);
 423
 424         if (parse_tag(tag))
 425                 return -1;
 426         if (name)
 427                 fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
 428         return options->walk(tag->tagged, OBJ_ANY, data, options);
 429 }
 430
 431 int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
 432 {
 433         if (!obj)
 434                 return -1;
 435
 436         if (obj->type == OBJ_NONE)
 437                 parse_object(the_repository, &obj->oid);
 438
 439         switch (obj->type) {
 440         case OBJ_BLOB:
 441                 return 0;
 442         case OBJ_TREE:
 443                 return fsck_walk_tree((struct tree *)obj, data, options);
 444         case OBJ_COMMIT:
 445                 return fsck_walk_commit((struct commit *)obj, data, options);
 446         case OBJ_TAG:
 447                 return fsck_walk_tag((struct tag *)obj, data, options);
 448         default:
 449                 error("Unknown object type for %s",
 450                       fsck_describe_object(options, &obj->oid));
 451                 return -1;
 452         }
 453 }
 454
 455 struct name_stack {
 456         const char **names;
 457         size_t nr, alloc;
 458 };
 459
 460 static void name_stack_push(struct name_stack *stack, const char *name)
 461 {
 462         ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);
 463         stack->names[stack->nr++] = name;
 464 }
 465
 466 static const char *name_stack_pop(struct name_stack *stack)
 467 {
 468         return stack->nr ? stack->names[--stack->nr] : NULL;
 469 }
 470
 471 static void name_stack_clear(struct name_stack *stack)
 472 {
 473         FREE_AND_NULL(stack->names);
 474         stack->nr = stack->alloc = 0;
 475 }
 476
 477 /*
 478  * The entries in a tree are ordered in the _path_ order,
 479  * which means that a directory entry is ordered by adding
 480  * a slash to the end of it.
 481  *
 482  * So a directory called "a" is ordered _after_ a file
 483  * called "a.c", because "a/" sorts after "a.c".
 484  */
 485 #define TREE_UNORDERED (-1)
 486 #define TREE_HAS_DUPS  (-2)
 487
 488 static int is_less_than_slash(unsigned char c)
 489 {
 490         return '\0' < c && c < '/';
 491 }
 492
 493 static int verify_ordered(unsigned mode1, const char *name1,
 494                           unsigned mode2, const char *name2,
 495                           struct name_stack *candidates)
 496 {
 497         int len1 = strlen(name1);
 498         int len2 = strlen(name2);
 499         int len = len1 < len2 ? len1 : len2;
 500         unsigned char c1, c2;
 501         int cmp;
 502
 503         cmp = memcmp(name1, name2, len);
 504         if (cmp < 0)
 505                 return 0;
 506         if (cmp > 0)
 507                 return TREE_UNORDERED;
 508
 509         /*
 510          * Ok, the first <len> characters are the same.
 511          * Now we need to order the next one, but turn
 512          * a '\0' into a '/' for a directory entry.
 513          */
 514         c1 = name1[len];
 515         c2 = name2[len];
 516         if (!c1 && !c2)
 517                 /*
 518                  * git-write-tree used to write out a nonsense tree that has
 519                  * entries with the same name, one blob and one tree.  Make
 520                  * sure we do not have duplicate entries.
 521                  */
 522                 return TREE_HAS_DUPS;
 523         if (!c1 && S_ISDIR(mode1))
 524                 c1 = '/';
 525         if (!c2 && S_ISDIR(mode2))
 526                 c2 = '/';
 527
 528         /*
 529          * There can be non-consecutive duplicates due to the implicitly
 530          * added slash, e.g.:
 531          *
 532          *   foo
 533          *   foo.bar
 534          *   foo.bar.baz
 535          *   foo.bar/
 536          *   foo/
 537          *
 538          * Record non-directory candidates (like "foo" and "foo.bar" in
 539          * the example) on a stack and check directory candidates (like
 540          * foo/" and "foo.bar/") against that stack.
 541          */
 542         if (!c1 && is_less_than_slash(c2)) {
 543                 name_stack_push(candidates, name1);
 544         } else if (c2 == '/' && is_less_than_slash(c1)) {
 545                 for (;;) {
 546                         const char *p;
 547                         const char *f_name = name_stack_pop(candidates);
 548
 549                         if (!f_name)
 550                                 break;
 551                         if (!skip_prefix(name2, f_name, &p))
 552                                 continue;
 553                         if (!*p)
 554                                 return TREE_HAS_DUPS;
 555                         if (is_less_than_slash(*p)) {
 556                                 name_stack_push(candidates, f_name);
 557                                 break;
 558                         }
 559                 }
 560         }
 561
 562         return c1 < c2 ? 0 : TREE_UNORDERED;
 563 }
 564
 565 static int fsck_tree(const struct object_id *tree_oid,
 566                      const char *buffer, unsigned long size,
 567                      struct fsck_options *options)
 568 {
 569         int retval = 0;
 570         int has_null_sha1 = 0;
 571         int has_full_path = 0;
 572         int has_empty_name = 0;
 573         int has_dot = 0;
 574         int has_dotdot = 0;
 575         int has_dotgit = 0;
 576         int has_zero_pad = 0;
 577         int has_bad_modes = 0;
 578         int has_dup_entries = 0;
 579         int not_properly_sorted = 0;
 580         struct tree_desc desc;
 581         unsigned o_mode;
 582         const char *o_name;
 583         struct name_stack df_dup_candidates = { NULL };
 584
 585         if (init_tree_desc_gently(&desc, buffer, size, TREE_DESC_RAW_MODES)) {
 586                 retval += report(options, tree_oid, OBJ_TREE,
 587                                  FSCK_MSG_BAD_TREE,
 588                                  "cannot be parsed as a tree");
 589                 return retval;
 590         }
 591
 592         o_mode = 0;
 593         o_name = NULL;
 594
 595         while (desc.size) {
 596                 unsigned short mode;
 597                 const char *name, *backslash;
 598                 const struct object_id *entry_oid;
 599
 600                 entry_oid = tree_entry_extract(&desc, &name, &mode);
 601
 602                 has_null_sha1 |= is_null_oid(entry_oid);
 603                 has_full_path |= !!strchr(name, '/');
 604                 has_empty_name |= !*name;
 605                 has_dot |= !strcmp(name, ".");
 606                 has_dotdot |= !strcmp(name, "..");
 607                 has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 608                 has_zero_pad |= *(char *)desc.buffer == '0';
 609
 610                 if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
 611                         if (!S_ISLNK(mode))
 612                                 oidset_insert(&options->gitmodules_found,
 613                                               entry_oid);
 614                         else
 615                                 retval += report(options,
 616                                                  tree_oid, OBJ_TREE,
 617                                                  FSCK_MSG_GITMODULES_SYMLINK,
 618                                                  ".gitmodules is a symbolic link");
 619                 }
 620
 621                 if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
 622                         if (!S_ISLNK(mode))
 623                                 oidset_insert(&options->gitattributes_found,
 624                                               entry_oid);
 625                         else
 626                                 retval += report(options, tree_oid, OBJ_TREE,
 627                                                  FSCK_MSG_GITATTRIBUTES_SYMLINK,
 628                                                  ".gitattributes is a symlink");
 629                 }
 630
 631                 if (S_ISLNK(mode)) {
 632                         if (is_hfs_dotgitignore(name) ||
 633                             is_ntfs_dotgitignore(name))
 634                                 retval += report(options, tree_oid, OBJ_TREE,
 635                                                  FSCK_MSG_GITIGNORE_SYMLINK,
 636                                                  ".gitignore is a symlink");
 637                         if (is_hfs_dotmailmap(name) ||
 638                             is_ntfs_dotmailmap(name))
 639                                 retval += report(options, tree_oid, OBJ_TREE,
 640                                                  FSCK_MSG_MAILMAP_SYMLINK,
 641                                                  ".mailmap is a symlink");
 642                         oidset_insert(&options->symlink_targets_found,
 643                                       entry_oid);
 644                 }
 645
 646                 if ((backslash = strchr(name, '\\'))) {
 647                         while (backslash) {
 648                                 backslash++;
 649                                 has_dotgit |= is_ntfs_dotgit(backslash);
 650                                 if (is_ntfs_dotgitmodules(backslash)) {
 651                                         if (!S_ISLNK(mode))
 652                                                 oidset_insert(&options->gitmodules_found,
 653                                                               entry_oid);
 654                                         else
 655                                                 retval += report(options, tree_oid, OBJ_TREE,
 656                                                                  FSCK_MSG_GITMODULES_SYMLINK,
 657                                                                  ".gitmodules is a symbolic link");
 658                                 }
 659                                 backslash = strchr(backslash, '\\');
 660                         }
 661                 }
 662
 663                 if (update_tree_entry_gently(&desc)) {
 664                         retval += report(options, tree_oid, OBJ_TREE,
 665                                          FSCK_MSG_BAD_TREE,
 666                                          "cannot be parsed as a tree");
 667                         break;
 668                 }
 669
 670                 switch (mode) {
 671                 /*
 672                  * Standard modes..
 673                  */
 674                 case S_IFREG | 0755:
 675                 case S_IFREG | 0644:
 676                 case S_IFLNK:
 677                 case S_IFDIR:
 678                 case S_IFGITLINK:
 679                         break;
 680                 /*
 681                  * This is nonstandard, but we had a few of these
 682                  * early on when we honored the full set of mode
 683                  * bits..
 684                  */
 685                 case S_IFREG | 0664:
 686                         if (!options->strict)
 687                                 break;
 688                         /* fallthrough */
 689                 default:
 690                         has_bad_modes = 1;
 691                 }
 692
 693                 if (o_name) {
 694                         switch (verify_ordered(o_mode, o_name, mode, name,
 695                                                &df_dup_candidates)) {
 696                         case TREE_UNORDERED:
 697                                 not_properly_sorted = 1;
 698                                 break;
 699                         case TREE_HAS_DUPS:
 700                                 has_dup_entries = 1;
 701                                 break;
 702                         default:
 703                                 break;
 704                         }
 705                 }
 706
 707                 o_mode = mode;
 708                 o_name = name;
 709         }
 710
 711         name_stack_clear(&df_dup_candidates);
 712
 713         if (has_null_sha1)
 714                 retval += report(options, tree_oid, OBJ_TREE,
 715                                  FSCK_MSG_NULL_SHA1,
 716                                  "contains entries pointing to null sha1");
 717         if (has_full_path)
 718                 retval += report(options, tree_oid, OBJ_TREE,
 719                                  FSCK_MSG_FULL_PATHNAME,
 720                                  "contains full pathnames");
 721         if (has_empty_name)
 722                 retval += report(options, tree_oid, OBJ_TREE,
 723                                  FSCK_MSG_EMPTY_NAME,
 724                                  "contains empty pathname");
 725         if (has_dot)
 726                 retval += report(options, tree_oid, OBJ_TREE,
 727                                  FSCK_MSG_HAS_DOT,
 728                                  "contains '.'");
 729         if (has_dotdot)
 730                 retval += report(options, tree_oid, OBJ_TREE,
 731                                  FSCK_MSG_HAS_DOTDOT,
 732                                  "contains '..'");
 733         if (has_dotgit)
 734                 retval += report(options, tree_oid, OBJ_TREE,
 735                                  FSCK_MSG_HAS_DOTGIT,
 736                                  "contains '.git'");
 737         if (has_zero_pad)
 738                 retval += report(options, tree_oid, OBJ_TREE,
 739                                  FSCK_MSG_ZERO_PADDED_FILEMODE,
 740                                  "contains zero-padded file modes");
 741         if (has_bad_modes)
 742                 retval += report(options, tree_oid, OBJ_TREE,
 743                                  FSCK_MSG_BAD_FILEMODE,
 744                                  "contains bad file modes");
 745         if (has_dup_entries)
 746                 retval += report(options, tree_oid, OBJ_TREE,
 747                                  FSCK_MSG_DUPLICATE_ENTRIES,
 748                                  "contains duplicate file entries");
 749         if (not_properly_sorted)
 750                 retval += report(options, tree_oid, OBJ_TREE,
 751                                  FSCK_MSG_TREE_NOT_SORTED,
 752                                  "not properly sorted");
 753         return retval;
 754 }
 755
 756 /*
 757  * Confirm that the headers of a commit or tag object end in a reasonable way,
 758  * either with the usual "\n\n" separator, or at least with a trailing newline
 759  * on the final header line.
 760  *
 761  * This property is important for the memory safety of our callers. It allows
 762  * them to scan the buffer linewise without constantly checking the remaining
 763  * size as long as:
 764  *
 765  *   - they check that there are bytes left in the buffer at the start of any
 766  *     line (i.e., that the last newline they saw was not the final one we
 767  *     found here)
 768  *
 769  *   - any intra-line scanning they do will stop at a newline, which will worst
 770  *     case hit the newline we found here as the end-of-header. This makes it
 771  *     OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
 772  */
 773 static int verify_headers(const void *data, unsigned long size,
 774                           const struct object_id *oid, enum object_type type,
 775                           struct fsck_options *options)
 776 {
 777         const char *buffer = (const char *)data;
 778         unsigned long i;
 779
 780         for (i = 0; i < size; i++) {
 781                 switch (buffer[i]) {
 782                 case '\0':
 783                         return report(options, oid, type,
 784                                 FSCK_MSG_NUL_IN_HEADER,
 785                                 "unterminated header: NUL at offset %ld", i);
 786                 case '\n':
 787                         if (i + 1 < size && buffer[i + 1] == '\n')
 788                                 return 0;
 789                 }
 790         }
 791
 792         /*
 793          * We did not find double-LF that separates the header
 794          * and the body.  Not having a body is not a crime but
 795          * we do want to see the terminating LF for the last header
 796          * line.
 797          */
 798         if (size && buffer[size - 1] == '\n')
 799                 return 0;
 800
 801         return report(options, oid, type,
 802                 FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
 803 }
 804
 805 static int fsck_ident(const char **ident,
 806                       const struct object_id *oid, enum object_type type,
 807                       struct fsck_options *options)
 808 {
 809         const char *p = *ident;
 810         char *end;
 811
 812         *ident = strchrnul(*ident, '\n');
 813         if (**ident == '\n')
 814                 (*ident)++;
 815
 816         if (*p == '<')
 817                 return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
 818         p += strcspn(p, "<>\n");
 819         if (*p == '>')
 820                 return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
 821         if (*p != '<')
 822                 return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
 823         if (p[-1] != ' ')
 824                 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
 825         p++;
 826         p += strcspn(p, "<>\n");
 827         if (*p != '>')
 828                 return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
 829         p++;
 830         if (*p != ' ')
 831                 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
 832         p++;
 833         /*
 834          * Our timestamp parser is based on the C strto*() functions, which
 835          * will happily eat whitespace, including the newline that is supposed
 836          * to prevent us walking past the end of the buffer. So do our own
 837          * scan, skipping linear whitespace but not newlines, and then
 838          * confirming we found a digit. We _could_ be even more strict here,
 839          * as we really expect only a single space, but since we have
 840          * traditionally allowed extra whitespace, we'll continue to do so.
 841          */
 842         while (*p == ' ' || *p == '\t')
 843                 p++;
 844         if (!isdigit(*p))
 845                 return report(options, oid, type, FSCK_MSG_BAD_DATE,
 846                               "invalid author/committer line - bad date");
 847         if (*p == '0' && p[1] != ' ')
 848                 return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
 849         if (date_overflows(parse_timestamp(p, &end, 10)))
 850                 return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
 851         if ((end == p || *end != ' '))
 852                 return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
 853         p = end + 1;
 854         if ((*p != '+' && *p != '-') ||
 855             !isdigit(p[1]) ||
 856             !isdigit(p[2]) ||
 857             !isdigit(p[3]) ||
 858             !isdigit(p[4]) ||
 859             (p[5] != '\n'))
 860                 return report(options, oid, type, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
 861         p += 6;
 862         return 0;
 863 }
 864
 865 static int fsck_commit(const struct object_id *oid,
 866                        const char *buffer, unsigned long size,
 867                        struct fsck_options *options)
 868 {
 869         struct object_id tree_oid, parent_oid;
 870         unsigned author_count;
 871         int err;
 872         const char *buffer_begin = buffer;
 873         const char *buffer_end = buffer + size;
 874         const char *p;
 875
 876         /*
 877          * We _must_ stop parsing immediately if this reports failure, as the
 878          * memory safety of the rest of the function depends on it. See the
 879          * comment above the definition of verify_headers() for more details.
 880          */
 881         if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
 882                 return -1;
 883
 884         if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
 885                 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
 886         if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
 887                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
 888                 if (err)
 889                         return err;
 890         }
 891         buffer = p + 1;
 892         while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
 893                 if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
 894                         err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
 895                         if (err)
 896                                 return err;
 897                 }
 898                 buffer = p + 1;
 899         }
 900         author_count = 0;
 901         while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
 902                 author_count++;
 903                 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
 904                 if (err)
 905                         return err;
 906         }
 907         if (author_count < 1)
 908                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
 909         else if (author_count > 1)
 910                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
 911         if (err)
 912                 return err;
 913         if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
 914                 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
 915         err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
 916         if (err)
 917                 return err;
 918         if (memchr(buffer_begin, '\0', size)) {
 919                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
 920                              "NUL byte in the commit object body");
 921                 if (err)
 922                         return err;
 923         }
 924         return 0;
 925 }
 926
 927 static int fsck_tag(const struct object_id *oid, const char *buffer,
 928                     unsigned long size, struct fsck_options *options)
 929 {
 930         struct object_id tagged_oid;
 931         int tagged_type;
 932         return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,
 933                                    &tagged_type);
 934 }
 935
 936 int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
 937                         unsigned long size, struct fsck_options *options,
 938                         struct object_id *tagged_oid,
 939                         int *tagged_type)
 940 {
 941         int ret = 0;
 942         char *eol;
 943         struct strbuf sb = STRBUF_INIT;
 944         const char *buffer_end = buffer + size;
 945         const char *p;
 946
 947         /*
 948          * We _must_ stop parsing immediately if this reports failure, as the
 949          * memory safety of the rest of the function depends on it. See the
 950          * comment above the definition of verify_headers() for more details.
 951          */
 952         ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
 953         if (ret)
 954                 goto done;
 955
 956         if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
 957                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
 958                 goto done;
 959         }
 960         if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {
 961                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
 962                 if (ret)
 963                         goto done;
 964         }
 965         buffer = p + 1;
 966
 967         if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
 968                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
 969                 goto done;
 970         }
 971         eol = memchr(buffer, '\n', buffer_end - buffer);
 972         if (!eol) {
 973                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
 974                 goto done;
 975         }
 976         *tagged_type = type_from_string_gently(buffer, eol - buffer, 1);
 977         if (*tagged_type < 0)
 978                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
 979         if (ret)
 980                 goto done;
 981         buffer = eol + 1;
 982
 983         if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
 984                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
 985                 goto done;
 986         }
 987         eol = memchr(buffer, '\n', buffer_end - buffer);
 988         if (!eol) {
 989                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
 990                 goto done;
 991         }
 992         strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
 993         if (check_refname_format(sb.buf, 0)) {
 994                 ret = report(options, oid, OBJ_TAG,
 995                              FSCK_MSG_BAD_TAG_NAME,
 996                              "invalid 'tag' name: %.*s",
 997                              (int)(eol - buffer), buffer);
 998                 if (ret)
 999                         goto done;
1000         }
1001         buffer = eol + 1;
1002
1003         if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
1004                 /* early tags do not contain 'tagger' lines; warn only */
1005                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
1006                 if (ret)
1007                         goto done;
1008         }
1009         else
1010                 ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
1011
1012         if (buffer < buffer_end && !starts_with(buffer, "\n")) {
1013                 /*
1014                  * The verify_headers() check will allow
1015                  * e.g. "[...]tagger <tagger>\nsome
1016                  * garbage\n\nmessage" to pass, thinking "some
1017                  * garbage" could be a custom header. E.g. "mktag"
1018                  * doesn't want any unknown headers.
1019                  */
1020                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");
1021                 if (ret)
1022                         goto done;
1023         }
1024
1025 done:
1026         strbuf_release(&sb);
1027         return ret;
1028 }
1029
1030 static int starts_with_dot_slash(const char *const path)
1031 {
1032         return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
1033                                 PATH_MATCH_XPLATFORM);
1034 }
1035
1036 static int starts_with_dot_dot_slash(const char *const path)
1037 {
1038         return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
1039                                 PATH_MATCH_XPLATFORM);
1040 }
1041
1042 static int submodule_url_is_relative(const char *url)
1043 {
1044         return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
1045 }
1046
1047 /*
1048  * Count directory components that a relative submodule URL should chop
1049  * from the remote_url it is to be resolved against.
1050  *
1051  * In other words, this counts "../" components at the start of a
1052  * submodule URL.
1053  *
1054  * Returns the number of directory components to chop and writes a
1055  * pointer to the next character of url after all leading "./" and
1056  * "../" components to out.
1057  */
1058 static int count_leading_dotdots(const char *url, const char **out)
1059 {
1060         int result = 0;
1061         while (1) {
1062                 if (starts_with_dot_dot_slash(url)) {
1063                         result++;
1064                         url += strlen("../");
1065                         continue;
1066                 }
1067                 if (starts_with_dot_slash(url)) {
1068                         url += strlen("./");
1069                         continue;
1070                 }
1071                 *out = url;
1072                 return result;
1073         }
1074 }
1075 /*
1076  * Check whether a transport is implemented by git-remote-curl.
1077  *
1078  * If it is, returns 1 and writes the URL that would be passed to
1079  * git-remote-curl to the "out" parameter.
1080  *
1081  * Otherwise, returns 0 and leaves "out" untouched.
1082  *
1083  * Examples:
1084  *   http::https://example.com/repo.git -> 1, https://example.com/repo.git
1085  *   https://example.com/repo.git -> 1, https://example.com/repo.git
1086  *   git://example.com/repo.git -> 0
1087  *
1088  * This is for use in checking for previously exploitable bugs that
1089  * required a submodule URL to be passed to git-remote-curl.
1090  */
1091 static int url_to_curl_url(const char *url, const char **out)
1092 {
1093         /*
1094          * We don't need to check for case-aliases, "http.exe", and so
1095          * on because in the default configuration, is_transport_allowed
1096          * prevents URLs with those schemes from being cloned
1097          * automatically.
1098          */
1099         if (skip_prefix(url, "http::", out) ||
1100             skip_prefix(url, "https::", out) ||
1101             skip_prefix(url, "ftp::", out) ||
1102             skip_prefix(url, "ftps::", out))
1103                 return 1;
1104         if (starts_with(url, "http://") ||
1105             starts_with(url, "https://") ||
1106             starts_with(url, "ftp://") ||
1107             starts_with(url, "ftps://")) {
1108                 *out = url;
1109                 return 1;
1110         }
1111         return 0;
1112 }
1113
1114 static int check_submodule_url(const char *url)
1115 {
1116         const char *curl_url;
1117
1118         if (looks_like_command_line_option(url))
1119                 return -1;
1120
1121         if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
1122                 char *decoded;
1123                 const char *next;
1124                 int has_nl;
1125
1126                 /*
1127                  * This could be appended to an http URL and url-decoded;
1128                  * check for malicious characters.
1129                  */
1130                 decoded = url_decode(url);
1131                 has_nl = !!strchr(decoded, '\n');
1132
1133                 free(decoded);
1134                 if (has_nl)
1135                         return -1;
1136
1137                 /*
1138                  * URLs which escape their root via "../" can overwrite
1139                  * the host field and previous components, resolving to
1140                  * URLs like https::example.com/submodule.git and
1141                  * https:///example.com/submodule.git that were
1142                  * susceptible to CVE-2020-11008.
1143                  */
1144                 if (count_leading_dotdots(url, &next) > 0 &&
1145                     (*next == ':' || *next == '/'))
1146                         return -1;
1147         }
1148
1149         else if (url_to_curl_url(url, &curl_url)) {
1150                 struct credential c = CREDENTIAL_INIT;
1151                 int ret = 0;
1152                 if (credential_from_url_gently(&c, curl_url, 1) ||
1153                     !*c.host)
1154                         ret = -1;
1155                 credential_clear(&c);
1156                 return ret;
1157         }
1158
1159         return 0;
1160 }
1161
1162 struct fsck_gitmodules_data {
1163         const struct object_id *oid;
1164         struct fsck_options *options;
1165         int ret;
1166 };
1167
1168 static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
1169 {
1170         struct fsck_gitmodules_data *data = vdata;
1171         const char *subsection, *key;
1172         size_t subsection_len;
1173         char *name;
1174
1175         if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
1176             !subsection)
1177                 return 0;
1178
1179         name = xmemdupz(subsection, subsection_len);
1180         if (check_submodule_name(name) < 0)
1181                 data->ret |= report(data->options,
1182                                     data->oid, OBJ_BLOB,
1183                                     FSCK_MSG_GITMODULES_NAME,
1184                                     "disallowed submodule name: %s",
1185                                     name);
1186         if (!strcmp(key, "url") && value &&
1187             check_submodule_url(value) < 0)
1188                 data->ret |= report(data->options,
1189                                     data->oid, OBJ_BLOB,
1190                                     FSCK_MSG_GITMODULES_URL,
1191                                     "disallowed submodule url: %s",
1192                                     value);
1193         if (!strcmp(key, "path") && value &&
1194             looks_like_command_line_option(value))
1195                 data->ret |= report(data->options,
1196                                     data->oid, OBJ_BLOB,
1197                                     FSCK_MSG_GITMODULES_PATH,
1198                                     "disallowed submodule path: %s",
1199                                     value);
1200         if (!strcmp(key, "update") && value &&
1201             parse_submodule_update_type(value) == SM_UPDATE_COMMAND)
1202                 data->ret |= report(data->options, data->oid, OBJ_BLOB,
1203                                     FSCK_MSG_GITMODULES_UPDATE,
1204                                     "disallowed submodule update setting: %s",
1205                                     value);
1206         free(name);
1207
1208         return 0;
1209 }
1210
1211 static int fsck_blob(const struct object_id *oid, const char *buf,
1212                      unsigned long size, struct fsck_options *options)
1213 {
1214         int ret = 0;
1215
1216         if (object_on_skiplist(options, oid))
1217                 return 0;
1218
1219         if (oidset_contains(&options->gitmodules_found, oid)) {
1220                 struct config_options config_opts = { 0 };
1221                 struct fsck_gitmodules_data data;
1222
1223                 oidset_insert(&options->gitmodules_done, oid);
1224
1225                 if (!buf) {
1226                         /*
1227                          * A missing buffer here is a sign that the caller found the
1228                          * blob too gigantic to load into memory. Let's just consider
1229                          * that an error.
1230                          */
1231                         return report(options, oid, OBJ_BLOB,
1232                                         FSCK_MSG_GITMODULES_LARGE,
1233                                         ".gitmodules too large to parse");
1234                 }
1235
1236                 data.oid = oid;
1237                 data.options = options;
1238                 data.ret = 0;
1239                 config_opts.error_action = CONFIG_ERROR_SILENT;
1240                 if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1241                                         ".gitmodules", buf, size, &data, &config_opts))
1242                         data.ret |= report(options, oid, OBJ_BLOB,
1243                                         FSCK_MSG_GITMODULES_PARSE,
1244                                         "could not parse gitmodules blob");
1245                 ret |= data.ret;
1246         }
1247
1248         if (oidset_contains(&options->gitattributes_found, oid)) {
1249                 const char *ptr;
1250
1251                 oidset_insert(&options->gitattributes_done, oid);
1252
1253                 if (!buf || size > ATTR_MAX_FILE_SIZE) {
1254                         /*
1255                          * A missing buffer here is a sign that the caller found the
1256                          * blob too gigantic to load into memory. Let's just consider
1257                          * that an error.
1258                          */
1259                         return report(options, oid, OBJ_BLOB,
1260                                         FSCK_MSG_GITATTRIBUTES_LARGE,
1261                                         ".gitattributes too large to parse");
1262                 }
1263
1264                 for (ptr = buf; *ptr; ) {
1265                         const char *eol = strchrnul(ptr, '\n');
1266                         if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {
1267                                 ret |= report(options, oid, OBJ_BLOB,
1268                                               FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,
1269                                               ".gitattributes has too long lines to parse");
1270                                 break;
1271                         }
1272
1273                         ptr = *eol ? eol + 1 : eol;
1274                 }
1275         }
1276
1277         if (oidset_contains(&options->symlink_targets_found, oid)) {
1278                 const char *ptr = buf;
1279                 const struct object_id *reported = NULL;
1280
1281                 oidset_insert(&options->symlink_targets_done, oid);
1282
1283                 if (!buf || size > PATH_MAX) {
1284                         /*
1285                          * A missing buffer here is a sign that the caller found the
1286                          * blob too gigantic to load into memory. Let's just consider
1287                          * that an error.
1288                          */
1289                         return report(options, oid, OBJ_BLOB,
1290                                         FSCK_MSG_SYMLINK_TARGET_LENGTH,
1291                                         "symlink target too long");
1292                 }
1293
1294                 while (!reported && ptr) {
1295                         const char *p = ptr;
1296                         char c, *slash = strchrnul(ptr, '/');
1297                         char *backslash = memchr(ptr, '\\', slash - ptr);
1298
1299                         c = *slash;
1300                         *slash = '\0';
1301
1302                         while (!reported && backslash) {
1303                                 *backslash = '\0';
1304                                 if (is_ntfs_dotgit(p))
1305                                         ret |= report(options, reported = oid, OBJ_BLOB,
1306                                                       FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR,
1307                                                       "symlink target points to git dir");
1308                                 *backslash = '\\';
1309                                 p = backslash + 1;
1310                                 backslash = memchr(p, '\\', slash - p);
1311                         }
1312                         if (!reported && is_ntfs_dotgit(p))
1313                                 ret |= report(options, reported = oid, OBJ_BLOB,
1314                                               FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR,
1315                                               "symlink target points to git dir");
1316
1317                         if (!reported && is_hfs_dotgit(ptr))
1318                                 ret |= report(options, reported = oid, OBJ_BLOB,
1319                                               FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR,
1320                                               "symlink target points to git dir");
1321
1322                         *slash = c;
1323                         ptr = c ? slash + 1 : NULL;
1324                 }
1325         }
1326
1327         return ret;
1328 }
1329
1330 int fsck_object(struct object *obj, void *data, unsigned long size,
1331         struct fsck_options *options)
1332 {
1333         if (!obj)
1334                 return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
1335
1336         return fsck_buffer(&obj->oid, obj->type, data, size, options);
1337 }
1338
1339 int fsck_buffer(const struct object_id *oid, enum object_type type,
1340                 void *data, unsigned long size,
1341                 struct fsck_options *options)
1342 {
1343         if (type == OBJ_BLOB)
1344                 return fsck_blob(oid, data, size, options);
1345         if (type == OBJ_TREE)
1346                 return fsck_tree(oid, data, size, options);
1347         if (type == OBJ_COMMIT)
1348                 return fsck_commit(oid, data, size, options);
1349         if (type == OBJ_TAG)
1350                 return fsck_tag(oid, data, size, options);
1351
1352         return report(options, oid, type,
1353                       FSCK_MSG_UNKNOWN_TYPE,
1354                       "unknown type '%d' (internal fsck error)",
1355                       type);
1356 }
1357
1358 int fsck_error_function(struct fsck_options *o,
1359                         const struct object_id *oid,
1360                         enum object_type object_type,
1361                         enum fsck_msg_type msg_type,
1362                         enum fsck_msg_id msg_id,
1363                         const char *message)
1364 {
1365         if (msg_type == FSCK_WARN) {
1366                 warning("object %s: %s", fsck_describe_object(o, oid), message);
1367                 return 0;
1368         }
1369         error("object %s: %s", fsck_describe_object(o, oid), message);
1370         return 1;
1371 }
1372
1373 static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
1374                       enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
1375                       struct fsck_options *options, const char *blob_type)
1376 {
1377         int ret = 0;
1378         struct oidset_iter iter;
1379         const struct object_id *oid;
1380
1381         oidset_iter_init(blobs_found, &iter);
1382         while ((oid = oidset_iter_next(&iter))) {
1383                 enum object_type type;
1384                 unsigned long size;
1385                 char *buf;
1386
1387                 if (oidset_contains(blobs_done, oid))
1388                         continue;
1389
1390                 buf = repo_read_object_file(the_repository, oid, &type, &size);
1391                 if (!buf) {
1392                         if (is_promisor_object(oid))
1393                                 continue;
1394                         ret |= report(options,
1395                                       oid, OBJ_BLOB, msg_missing,
1396                                       "unable to read %s blob", blob_type);
1397                         continue;
1398                 }
1399
1400                 if (type == OBJ_BLOB)
1401                         ret |= fsck_blob(oid, buf, size, options);
1402                 else
1403                         ret |= report(options, oid, type, msg_type,
1404                                       "non-blob found at %s", blob_type);
1405                 free(buf);
1406         }
1407
1408         oidset_clear(blobs_found);
1409         oidset_clear(blobs_done);
1410
1411         return ret;
1412 }
1413
1414 int fsck_finish(struct fsck_options *options)
1415 {
1416         int ret = 0;
1417
1418         ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
1419                           FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
1420                           options, ".gitmodules");
1421         ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
1422                           FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
1423                           options, ".gitattributes");
1424
1425         ret |= fsck_blobs(&options->symlink_targets_found, &options->symlink_targets_done,
1426                           FSCK_MSG_SYMLINK_TARGET_MISSING, FSCK_MSG_SYMLINK_TARGET_BLOB,
1427                           options, "<symlink-target>");
1428
1429         return ret;
1430 }
1431
1432 int git_fsck_config(const char *var, const char *value, void *cb)
1433 {
1434         struct fsck_options *options = cb;
1435         if (strcmp(var, "fsck.skiplist") == 0) {
1436                 const char *path;
1437                 struct strbuf sb = STRBUF_INIT;
1438
1439                 if (git_config_pathname(&path, var, value))
1440                         return 1;
1441                 strbuf_addf(&sb, "skiplist=%s", path);
1442                 free((char *)path);
1443                 fsck_set_msg_types(options, sb.buf);
1444                 strbuf_release(&sb);
1445                 return 0;
1446         }
1447
1448         if (skip_prefix(var, "fsck.", &var)) {
1449                 fsck_set_msg_type(options, var, value);
1450                 return 0;
1451         }
1452
1453         return git_default_config(var, value, cb);
1454 }
1455
1456 /*
1457  * Custom error callbacks that are used in more than one place.
1458  */
1459
1460 int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
1461                                            const struct object_id *oid,
1462                                            enum object_type object_type,
1463                                            enum fsck_msg_type msg_type,
1464                                            enum fsck_msg_id msg_id,
1465                                            const char *message)
1466 {
1467         if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
1468                 puts(oid_to_hex(oid));
1469                 return 0;
1470         }
1471         return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
1472 }