read-tree.c

   1 /*
   2  * GIT - The information manager from hell
   3  *
   4  * Copyright (C) Linus Torvalds, 2005
   5  */
   6 #include "cache.h"
   7
   8 static int stage = 0;
   9 static int update = 0;
  10
  11 static int unpack_tree(unsigned char *sha1)
  12 {
  13         void *buffer;
  14         unsigned long size;
  15         int ret;
  16
  17         buffer = read_object_with_reference(sha1, "tree", &size, NULL);
  18         if (!buffer)
  19                 return -1;
  20         ret = read_tree(buffer, size, stage, NULL);
  21         free(buffer);
  22         return ret;
  23 }
  24
  25 static int path_matches(struct cache_entry *a, struct cache_entry *b)
  26 {
  27         int len = ce_namelen(a);
  28         return ce_namelen(b) == len &&
  29                 !memcmp(a->name, b->name, len);
  30 }
  31
  32 static int same(struct cache_entry *a, struct cache_entry *b)
  33 {
  34         return a->ce_mode == b->ce_mode &&
  35                 !memcmp(a->sha1, b->sha1, 20);
  36 }
  37
  38
  39 /*
  40  * This removes all trivial merges that don't change the tree
  41  * and collapses them to state 0.
  42  */
  43 static struct cache_entry *merge_entries(struct cache_entry *a,
  44                                          struct cache_entry *b,
  45                                          struct cache_entry *c)
  46 {
  47         /*
  48          * Ok, all three entries describe the same
  49          * filename, but maybe the contents or file
  50          * mode have changed?
  51          *
  52          * The trivial cases end up being the ones where two
  53          * out of three files are the same:
  54          *  - both destinations the same, trivially take either
  55          *  - one of the destination versions hasn't changed,
  56          *    take the other.
  57          *
  58          * The "all entries exactly the same" case falls out as
  59          * a special case of any of the "two same" cases.
  60          *
  61          * Here "a" is "original", and "b" and "c" are the two
  62          * trees we are merging.
  63          */
  64         if (a && b && c) {
  65                 if (same(b,c))
  66                         return c;
  67                 if (same(a,b))
  68                         return c;
  69                 if (same(a,c))
  70                         return b;
  71         }
  72         return NULL;
  73 }
  74
  75 /*
  76  * When a CE gets turned into an unmerged entry, we
  77  * want it to be up-to-date
  78  */
  79 static void verify_uptodate(struct cache_entry *ce)
  80 {
  81         struct stat st;
  82
  83         if (!lstat(ce->name, &st)) {
  84                 unsigned changed = ce_match_stat(ce, &st);
  85                 if (!changed)
  86                         return;
  87                 errno = 0;
  88         }
  89         if (errno == ENOENT)
  90                 return;
  91         die("Entry '%s' not uptodate. Cannot merge.", ce->name);
  92 }
  93
  94 /*
  95  * If the old tree contained a CE that isn't even in the
  96  * result, that's always a problem, regardless of whether
  97  * it's up-to-date or not (ie it can be a file that we
  98  * have updated but not committed yet).
  99  */
 100 static void reject_merge(struct cache_entry *ce)
 101 {
 102         die("Entry '%s' would be overwritten by merge. Cannot merge.", ce->name);
 103 }
 104
 105 static int merged_entry_internal(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst, int allow_dirty)
 106 {
 107         merge->ce_flags |= htons(CE_UPDATE);
 108         if (old) {
 109                 /*
 110                  * See if we can re-use the old CE directly?
 111                  * That way we get the uptodate stat info.
 112                  *
 113                  * This also removes the UPDATE flag on
 114                  * a match.
 115                  */
 116                 if (same(old, merge)) {
 117                         *merge = *old;
 118                 } else if (!allow_dirty) {
 119                         verify_uptodate(old);
 120                 }
 121         }
 122         merge->ce_flags &= ~htons(CE_STAGEMASK);
 123         *dst++ = merge;
 124         return 1;
 125 }
 126
 127 static int merged_entry_allow_dirty(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst)
 128 {
 129         return merged_entry_internal(merge, old, dst, 1);
 130 }
 131
 132 static int merged_entry(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst)
 133 {
 134         return merged_entry_internal(merge, old, dst, 0);
 135 }
 136
 137 static int deleted_entry(struct cache_entry *ce, struct cache_entry *old, struct cache_entry **dst)
 138 {
 139         if (old)
 140                 verify_uptodate(old);
 141         ce->ce_mode = 0;
 142         *dst++ = ce;
 143         return 1;
 144 }
 145
 146 static int causes_df_conflict(struct cache_entry *ce, int stage,
 147                               struct cache_entry **dst_,
 148                               struct cache_entry **next_,
 149                               int tail)
 150 {
 151         /* This is called during the merge operation and walking
 152          * the active_cache[] array is messy, because it is in the
 153          * middle of overlapping copy operation.  The invariants
 154          * are:
 155          * (1) active_cache points at the first (zeroth) entry.
 156          * (2) up to dst pointer are resolved entries.
 157          * (3) from the next pointer (head-inclusive) to the tail
 158          *     of the active_cache array have the remaining paths
 159          *     to be processed.  There can be a gap between dst
 160          *     and next.  Note that next is called "src" in the
 161          *     merge_cache() function, and tail is the original
 162          *     end of active_cache array when merge_cache() started.
 163          * (4) the path corresponding to *ce is not found in (2)
 164          *     or (3).  It is in the gap.
 165          *
 166          *  active_cache -----......+++++++++++++.
 167          *                    ^dst  ^next        ^tail
 168          */
 169         int i, next, dst;
 170         const char *path = ce->name;
 171         int namelen = ce_namelen(ce);
 172
 173         next = next_ - active_cache;
 174         dst = dst_ - active_cache;
 175
 176         for (i = 0; i < tail; i++) {
 177                 int entlen, len;
 178                 const char *one, *two;
 179                 if (dst <= i && i < next)
 180                         continue;
 181                 ce = active_cache[i];
 182                 if (ce_stage(ce) != stage)
 183                         continue;
 184                 /* If ce->name is a prefix of path, then path is a file
 185                  * that hangs underneath ce->name, which is bad.
 186                  * If path is a prefix of ce->name, then it is the
 187                  * other way around which also is bad.
 188                  */
 189                 entlen = ce_namelen(ce);
 190                 if (namelen == entlen)
 191                         continue;
 192                 if (namelen < entlen) {
 193                         len = namelen;
 194                         one = path;
 195                         two = ce->name;
 196                 } else {
 197                         len = entlen;
 198                         one = ce->name;
 199                         two = path;
 200                 }
 201                 if (memcmp(one, two, len))
 202                         continue;
 203                 if (two[len] == '/')
 204                         return 1;
 205         }
 206         return 0;
 207 }
 208
 209 static int threeway_merge(struct cache_entry *stages[4],
 210                           struct cache_entry **dst,
 211                           struct cache_entry **next, int tail)
 212 {
 213         struct cache_entry *old = stages[0];
 214         struct cache_entry *a = stages[1], *b = stages[2], *c = stages[3];
 215         struct cache_entry *merge;
 216         int count;
 217
 218         /* #5ALT */
 219         if (!a && b && c && same(b, c)) {
 220                 if (old && !same(b, old))
 221                         return -1;
 222                 return merged_entry_allow_dirty(b, old, dst);
 223         }
 224         /* #2ALT and #3ALT */
 225         if (!a && (!!b != !!c)) {
 226                 /*
 227                  * The reason we need to worry about directory/file
 228                  * conflicts only in #2ALT and #3ALT case is this:
 229                  *
 230                  * (1) For all other cases that read-tree internally
 231                  *     resolves a path, we always have such a path in
 232                  *     *both* stage2 and stage3 when we begin.
 233                  *     Traditionally, the behaviour has been even
 234                  *     stricter and we did not resolve a path without
 235                  *     initially being in all of stage1, 2, and 3.
 236                  *
 237                  * (2) When read-tree finishes, all resolved paths (i.e.
 238                  *     the paths that are in stage0) must have come from
 239                  *     either stage2 or stage3.  It is not possible to
 240                  *     have a stage0 path as a result of a merge if
 241                  *     neither stage2 nor stage3 had that path.
 242                  *
 243                  * (3) It is guaranteed that just after reading the
 244                  *     stages, each stage cannot have directory/file
 245                  *     conflicts on its own, because they are populated
 246                  *     by reading hierarchy of a tree.  Combined with
 247                  *     (1) and (2) above, this means that no matter what
 248                  *     combination of paths we take from stage2 and
 249                  *     stage3 as a result of a merge, they cannot cause
 250                  *     a directory/file conflict situation (otherwise
 251                  *     the "guilty" path would have already had such a
 252                  *     conflict in the original stage, either stage2
 253                  *     or stage3).  Although its stage2 is synthesized
 254                  *     by overlaying the current index on top of "our
 255                  *     head" tree, --emu23 case also has this guarantee,
 256                  *     by calling add_cache_entry() to create such stage2
 257                  *     entries.
 258                  *
 259                  * (4) Only #2ALT and #3ALT lack the guarantee (1).
 260                  *     They resolve paths that exist only in stage2
 261                  *     or stage3.  The stage2 tree may have a file DF
 262                  *     while stage3 tree may have a file DF/DF.  If
 263                  *     #2ALT and #3ALT rules happen to apply to both
 264                  *     of them, we would end up having DF (coming from
 265                  *     stage2) and DF/DF (from stage3) in the result.
 266                  *     When we attempt to resolve a path that exists
 267                  *     only in stage2, we need to make sure there is
 268                  *     no path that would conflict with it in stage3
 269                  *     and vice versa.
 270                  */
 271                 if (c) { /* #2ALT */
 272                         if (!causes_df_conflict(c, 2, dst, next, tail) &&
 273                             (!old || same(c, old)))
 274                                 return merged_entry_allow_dirty(c, old, dst);
 275                 }
 276                 else { /* #3ALT */
 277                         if (!causes_df_conflict(b, 3, dst, next, tail) &&
 278                             (!old || same(b, old)))
 279                                 return merged_entry_allow_dirty(b, old, dst);
 280                 }
 281                 /* otherwise we will apply the original rule */
 282         }
 283         /* #14ALT */
 284         if (a && b && c && same(a, b) && !same(a, c)) {
 285                 if (old && same(old, c))
 286                         return merged_entry_allow_dirty(c, old, dst);
 287                 /* otherwise the regular rule applies */
 288         }
 289         /*
 290          * If we have an entry in the index cache ("old"), then we want
 291          * to make sure that it matches any entries in stage 2 ("first
 292          * branch", aka "b").
 293          */
 294         if (old) {
 295                 if (!b || !same(old, b))
 296                         return -1;
 297         }
 298         merge = merge_entries(a, b, c);
 299         if (merge)
 300                 return merged_entry(merge, old, dst);
 301         if (old)
 302                 verify_uptodate(old);
 303         count = 0;
 304         if (a) { *dst++ = a; count++; }
 305         if (b) { *dst++ = b; count++; }
 306         if (c) { *dst++ = c; count++; }
 307         return count;
 308 }
 309
 310 /*
 311  * Two-way merge.
 312  *
 313  * The rule is to "carry forward" what is in the index without losing
 314  * information across a "fast forward", favoring a successful merge
 315  * over a merge failure when it makes sense.  For details of the
 316  * "carry forward" rule, please see <Documentation/git-read-tree.txt>.
 317  *
 318  */
 319 static int twoway_merge(struct cache_entry **src, struct cache_entry **dst,
 320                         struct cache_entry **next, int tail)
 321 {
 322         struct cache_entry *current = src[0];
 323         struct cache_entry *oldtree = src[1], *newtree = src[2];
 324
 325         if (src[3])
 326                 return -1;
 327
 328         if (current) {
 329                 if ((!oldtree && !newtree) || /* 4 and 5 */
 330                     (!oldtree && newtree &&
 331                      same(current, newtree)) || /* 6 and 7 */
 332                     (oldtree && newtree &&
 333                      same(oldtree, newtree)) || /* 14 and 15 */
 334                     (oldtree && newtree &&
 335                      !same(oldtree, newtree) && /* 18 and 19*/
 336                      same(current, newtree))) {
 337                         *dst++ = current;
 338                         return 1;
 339                 }
 340                 else if (oldtree && !newtree && same(current, oldtree)) {
 341                         /* 10 or 11 */
 342                         return deleted_entry(oldtree, current, dst);
 343                 }
 344                 else if (oldtree && newtree &&
 345                          same(current, oldtree) && !same(current, newtree)) {
 346                         /* 20 or 21 */
 347                         return merged_entry(newtree, current, dst);
 348                 }
 349                 else
 350                         /* all other failures */
 351                         return -1;
 352         }
 353         else if (newtree)
 354                 return merged_entry(newtree, current, dst);
 355         else
 356                 return deleted_entry(oldtree, current, dst);
 357 }
 358
 359 /*
 360  * Two-way merge emulated with three-way merge.
 361  *
 362  * This treats "read-tree -m H M" by transforming it internally
 363  * into "read-tree -m H I+H M", where I+H is a tree that would
 364  * contain the contents of the current index file, overlayed on
 365  * top of H.  Unlike the traditional two-way merge, this leaves
 366  * the stages in the resulting index file and lets the user resolve
 367  * the merge conflicts using standard tools for three-way merge.
 368  *
 369  * This function is just to set-up such an arrangement, and the
 370  * actual merge uses threeway_merge() function.
 371  */
 372 static void setup_emu23(void)
 373 {
 374         /* stage0 contains I, stage1 H, stage2 M.
 375          * move stage2 to stage3, and create stage2 entries
 376          * by scanning stage0 and stage1 entries.
 377          */
 378         int i, namelen, size;
 379         struct cache_entry *ce, *stage2;
 380
 381         for (i = 0; i < active_nr; i++) {
 382                 ce = active_cache[i];
 383                 if (ce_stage(ce) != 2)
 384                         continue;
 385                 /* hoist them up to stage 3 */
 386                 namelen = ce_namelen(ce);
 387                 ce->ce_flags = create_ce_flags(namelen, 3);
 388         }
 389
 390         for (i = 0; i < active_nr; i++) {
 391                 ce = active_cache[i];
 392                 if (ce_stage(ce) > 1)
 393                         continue;
 394                 namelen = ce_namelen(ce);
 395                 size = cache_entry_size(namelen);
 396                 stage2 = xmalloc(size);
 397                 memcpy(stage2, ce, size);
 398                 stage2->ce_flags = create_ce_flags(namelen, 2);
 399                 if (add_cache_entry(stage2, ADD_CACHE_OK_TO_ADD) < 0)
 400                         die("cannot merge index and our head tree");
 401
 402                 /* We are done with this name, so skip to next name */
 403                 while (i < active_nr &&
 404                        ce_namelen(active_cache[i]) == namelen &&
 405                        !memcmp(active_cache[i]->name, ce->name, namelen))
 406                         i++;
 407                 i--; /* compensate for the loop control */
 408         }
 409 }
 410
 411 /*
 412  * One-way merge.
 413  *
 414  * The rule is:
 415  * - take the stat information from stage0, take the data from stage1
 416  */
 417 static int oneway_merge(struct cache_entry **src, struct cache_entry **dst,
 418                         struct cache_entry **next, int tail)
 419 {
 420         struct cache_entry *old = src[0];
 421         struct cache_entry *a = src[1];
 422
 423         if (src[2] || src[3])
 424                 return -1;
 425
 426         if (!a)
 427                 return 0;
 428         if (old && same(old, a)) {
 429                 *dst++ = old;
 430                 return 1;
 431         }
 432         return merged_entry(a, NULL, dst);
 433 }
 434
 435 static void check_updates(struct cache_entry **src, int nr)
 436 {
 437         static struct checkout state = {
 438                 .base_dir = "",
 439                 .force = 1,
 440                 .quiet = 1,
 441                 .refresh_cache = 1,
 442         };
 443         unsigned short mask = htons(CE_UPDATE);
 444         while (nr--) {
 445                 struct cache_entry *ce = *src++;
 446                 if (!ce->ce_mode) {
 447                         if (update)
 448                                 unlink(ce->name);
 449                         continue;
 450                 }
 451                 if (ce->ce_flags & mask) {
 452                         ce->ce_flags &= ~mask;
 453                         if (update)
 454                                 checkout_entry(ce, &state);
 455                 }
 456         }
 457 }
 458
 459 typedef int (*merge_fn_t)(struct cache_entry **, struct cache_entry **, struct cache_entry **, int);
 460
 461 static void merge_cache(struct cache_entry **src, int nr, merge_fn_t fn)
 462 {
 463         struct cache_entry **dst = src;
 464         int tail = nr;
 465
 466         while (nr) {
 467                 int entries;
 468                 struct cache_entry *name, *ce, *stages[4] = { NULL, };
 469
 470                 name = ce = *src;
 471                 for (;;) {
 472                         int stage = ce_stage(ce);
 473                         stages[stage] = ce;
 474                         ce = *++src;
 475                         active_nr--;
 476                         if (!--nr)
 477                                 break;
 478                         if (!path_matches(ce, name))
 479                                 break;
 480                 }
 481
 482                 entries = fn(stages, dst, src, tail);
 483                 if (entries < 0)
 484                         reject_merge(name);
 485                 dst += entries;
 486                 active_nr += entries;
 487         }
 488         check_updates(active_cache, active_nr);
 489 }
 490
 491 static int read_cache_unmerged(void)
 492 {
 493         int i, deleted;
 494         struct cache_entry **dst;
 495
 496         read_cache();
 497         dst = active_cache;
 498         deleted = 0;
 499         for (i = 0; i < active_nr; i++) {
 500                 struct cache_entry *ce = active_cache[i];
 501                 if (ce_stage(ce)) {
 502                         deleted++;
 503                         continue;
 504                 }
 505                 if (deleted)
 506                         *dst = ce;
 507                 dst++;
 508         }
 509         active_nr -= deleted;
 510         return deleted;
 511 }
 512
 513 static const char read_tree_usage[] = "git-read-tree (<sha> | -m [-u] <sha1> [<sha2> [<sha3>]])";
 514
 515 static struct cache_file cache_file;
 516
 517 int main(int argc, char **argv)
 518 {
 519         int i, newfd, merge, reset, emu23;
 520         unsigned char sha1[20];
 521
 522         newfd = hold_index_file_for_update(&cache_file, get_index_file());
 523         if (newfd < 0)
 524                 die("unable to create new cachefile");
 525
 526         merge = 0;
 527         reset = 0;
 528         emu23 = 0;
 529         for (i = 1; i < argc; i++) {
 530                 const char *arg = argv[i];
 531
 532                 /* "-u" means "update", meaning that a merge will update the working directory */
 533                 if (!strcmp(arg, "-u")) {
 534                         update = 1;
 535                         continue;
 536                 }
 537
 538                 /* This differs from "-m" in that we'll silently ignore unmerged entries */
 539                 if (!strcmp(arg, "--reset")) {
 540                         if (stage || merge || emu23)
 541                                 usage(read_tree_usage);
 542                         reset = 1;
 543                         merge = 1;
 544                         stage = 1;
 545                         read_cache_unmerged();
 546                         continue;
 547                 }
 548
 549                 /* "-m" stands for "merge", meaning we start in stage 1 */
 550                 if (!strcmp(arg, "-m")) {
 551                         if (stage || merge || emu23)
 552                                 usage(read_tree_usage);
 553                         if (read_cache_unmerged())
 554                                 die("you need to resolve your current index first");
 555                         stage = 1;
 556                         merge = 1;
 557                         continue;
 558                 }
 559
 560                 /* "-emu23" uses 3-way merge logic to perform fast-forward */
 561                 if (!strcmp(arg, "--emu23")) {
 562                         if (stage || merge || emu23)
 563                                 usage(read_tree_usage);
 564                         if (read_cache_unmerged())
 565                                 die("you need to resolve your current index first");
 566                         merge = emu23 = stage = 1;
 567                         continue;
 568                 }
 569
 570                 if (get_sha1(arg, sha1) < 0)
 571                         usage(read_tree_usage);
 572                 if (stage > 3)
 573                         usage(read_tree_usage);
 574                 if (unpack_tree(sha1) < 0)
 575                         die("failed to unpack tree object %s", arg);
 576                 stage++;
 577         }
 578         if (update && !merge)
 579                 usage(read_tree_usage);
 580         if (merge) {
 581                 static const merge_fn_t merge_function[] = {
 582                         [1] = oneway_merge,
 583                         [2] = twoway_merge,
 584                         [3] = threeway_merge,
 585                 };
 586                 merge_fn_t fn;
 587
 588                 if (stage < 2 || stage > 4)
 589                         die("just how do you expect me to merge %d trees?", stage-1);
 590                 if (emu23 && stage != 3)
 591                         die("--emu23 takes only two trees");
 592                 fn = merge_function[stage-1];
 593                 if (stage == 3 && emu23) {
 594                         setup_emu23();
 595                         fn = merge_function[3];
 596                 }
 597                 merge_cache(active_cache, active_nr, fn);
 598         }
 599         if (write_cache(newfd, active_cache, active_nr) ||
 600             commit_index_file(&cache_file))
 601                 die("unable to write new index file");
 602         return 0;
 603 }