merge-ort.c

   1 /*
   2  * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short.  Meant
   3  * as a drop-in replacement for the "recursive" merge strategy, allowing one
   4  * to replace
   5  *
   6  *   git merge [-s recursive]
   7  *
   8  * with
   9  *
  10  *   git merge -s ort
  11  *
  12  * Note: git's parser allows the space between '-s' and its argument to be
  13  * missing.  (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
  14  * "cale", "peedy", or "ins" instead of "ort"?)
  15  */
  16
  17 #include "cache.h"
  18 #include "merge-ort.h"
  19
  20 #include "alloc.h"
  21 #include "blob.h"
  22 #include "cache-tree.h"
  23 #include "commit.h"
  24 #include "commit-reach.h"
  25 #include "diff.h"
  26 #include "diffcore.h"
  27 #include "dir.h"
  28 #include "object-store.h"
  29 #include "strmap.h"
  30 #include "tree.h"
  31 #include "unpack-trees.h"
  32 #include "xdiff-interface.h"
  33
  34 /*
  35  * We have many arrays of size 3.  Whenever we have such an array, the
  36  * indices refer to one of the sides of the three-way merge.  This is so
  37  * pervasive that the constants 0, 1, and 2 are used in many places in the
  38  * code (especially in arithmetic operations to find the other side's index
  39  * or to compute a relevant mask), but sometimes these enum names are used
  40  * to aid code clarity.
  41  *
  42  * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
  43  * referred to there is one of these three sides.
  44  */
  45 enum merge_side {
  46         MERGE_BASE = 0,
  47         MERGE_SIDE1 = 1,
  48         MERGE_SIDE2 = 2
  49 };
  50
  51 struct rename_info {
  52         /*
  53          * All variables that are arrays of size 3 correspond to data tracked
  54          * for the sides in enum merge_side.  Index 0 is almost always unused
  55          * because we often only need to track information for MERGE_SIDE1 and
  56          * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames
  57          * are determined relative to what changed since the MERGE_BASE).
  58          */
  59
  60         /*
  61          * pairs: pairing of filenames from diffcore_rename()
  62          */
  63         struct diff_queue_struct pairs[3];
  64
  65         /*
  66          * dirs_removed: directories removed on a given side of history.
  67          */
  68         struct strset dirs_removed[3];
  69
  70         /*
  71          * dir_rename_count: tracking where parts of a directory were renamed to
  72          *
  73          * When files in a directory are renamed, they may not all go to the
  74          * same location.  Each strmap here tracks:
  75          *      old_dir => {new_dir => int}
  76          * That is, dir_rename_count[side] is a strmap to a strintmap.
  77          */
  78         struct strmap dir_rename_count[3];
  79
  80         /*
  81          * dir_renames: computed directory renames
  82          *
  83          * This is a map of old_dir => new_dir and is derived in part from
  84          * dir_rename_count.
  85          */
  86         struct strmap dir_renames[3];
  87
  88         /*
  89          * needed_limit: value needed for inexact rename detection to run
  90          *
  91          * If the current rename limit wasn't high enough for inexact
  92          * rename detection to run, this records the limit needed.  Otherwise,
  93          * this value remains 0.
  94          */
  95         int needed_limit;
  96 };
  97
  98 struct merge_options_internal {
  99         /*
 100          * paths: primary data structure in all of merge ort.
 101          *
 102          * The keys of paths:
 103          *   * are full relative paths from the toplevel of the repository
 104          *     (e.g. "drivers/firmware/raspberrypi.c").
 105          *   * store all relevant paths in the repo, both directories and
 106          *     files (e.g. drivers, drivers/firmware would also be included)
 107          *   * these keys serve to intern all the path strings, which allows
 108          *     us to do pointer comparison on directory names instead of
 109          *     strcmp; we just have to be careful to use the interned strings.
 110          *     (Technically paths_to_free may track some strings that were
 111          *      removed from froms paths.)
 112          *
 113          * The values of paths:
 114          *   * either a pointer to a merged_info, or a conflict_info struct
 115          *   * merged_info contains all relevant information for a
 116          *     non-conflicted entry.
 117          *   * conflict_info contains a merged_info, plus any additional
 118          *     information about a conflict such as the higher orders stages
 119          *     involved and the names of the paths those came from (handy
 120          *     once renames get involved).
 121          *   * a path may start "conflicted" (i.e. point to a conflict_info)
 122          *     and then a later step (e.g. three-way content merge) determines
 123          *     it can be cleanly merged, at which point it'll be marked clean
 124          *     and the algorithm will ignore any data outside the contained
 125          *     merged_info for that entry
 126          *   * If an entry remains conflicted, the merged_info portion of a
 127          *     conflict_info will later be filled with whatever version of
 128          *     the file should be placed in the working directory (e.g. an
 129          *     as-merged-as-possible variation that contains conflict markers).
 130          */
 131         struct strmap paths;
 132
 133         /*
 134          * conflicted: a subset of keys->values from "paths"
 135          *
 136          * conflicted is basically an optimization between process_entries()
 137          * and record_conflicted_index_entries(); the latter could loop over
 138          * ALL the entries in paths AGAIN and look for the ones that are
 139          * still conflicted, but since process_entries() has to loop over
 140          * all of them, it saves the ones it couldn't resolve in this strmap
 141          * so that record_conflicted_index_entries() can iterate just the
 142          * relevant entries.
 143          */
 144         struct strmap conflicted;
 145
 146         /*
 147          * paths_to_free: additional list of strings to free
 148          *
 149          * If keys are removed from "paths", they are added to paths_to_free
 150          * to ensure they are later freed.  We avoid free'ing immediately since
 151          * other places (e.g. conflict_info.pathnames[]) may still be
 152          * referencing these paths.
 153          */
 154         struct string_list paths_to_free;
 155
 156         /*
 157          * output: special messages and conflict notices for various paths
 158          *
 159          * This is a map of pathnames (a subset of the keys in "paths" above)
 160          * to strbufs.  It gathers various warning/conflict/notice messages
 161          * for later processing.
 162          */
 163         struct strmap output;
 164
 165         /*
 166          * renames: various data relating to rename detection
 167          */
 168         struct rename_info renames;
 169
 170         /*
 171          * current_dir_name, toplevel_dir: temporary vars
 172          *
 173          * These are used in collect_merge_info_callback(), and will set the
 174          * various merged_info.directory_name for the various paths we get;
 175          * see documentation for that variable and the requirements placed on
 176          * that field.
 177          */
 178         const char *current_dir_name;
 179         const char *toplevel_dir;
 180
 181         /* call_depth: recursion level counter for merging merge bases */
 182         int call_depth;
 183 };
 184
 185 struct version_info {
 186         struct object_id oid;
 187         unsigned short mode;
 188 };
 189
 190 struct merged_info {
 191         /* if is_null, ignore result.  otherwise result has oid & mode */
 192         struct version_info result;
 193         unsigned is_null:1;
 194
 195         /*
 196          * clean: whether the path in question is cleanly merged.
 197          *
 198          * see conflict_info.merged for more details.
 199          */
 200         unsigned clean:1;
 201
 202         /*
 203          * basename_offset: offset of basename of path.
 204          *
 205          * perf optimization to avoid recomputing offset of final '/'
 206          * character in pathname (0 if no '/' in pathname).
 207          */
 208         size_t basename_offset;
 209
 210          /*
 211           * directory_name: containing directory name.
 212           *
 213           * Note that we assume directory_name is constructed such that
 214           *    strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
 215           * i.e. string equality is equivalent to pointer equality.  For this
 216           * to hold, we have to be careful setting directory_name.
 217           */
 218         const char *directory_name;
 219 };
 220
 221 struct conflict_info {
 222         /*
 223          * merged: the version of the path that will be written to working tree
 224          *
 225          * WARNING: It is critical to check merged.clean and ensure it is 0
 226          * before reading any conflict_info fields outside of merged.
 227          * Allocated merge_info structs will always have clean set to 1.
 228          * Allocated conflict_info structs will have merged.clean set to 0
 229          * initially.  The merged.clean field is how we know if it is safe
 230          * to access other parts of conflict_info besides merged; if a
 231          * conflict_info's merged.clean is changed to 1, the rest of the
 232          * algorithm is not allowed to look at anything outside of the
 233          * merged member anymore.
 234          */
 235         struct merged_info merged;
 236
 237         /* oids & modes from each of the three trees for this path */
 238         struct version_info stages[3];
 239
 240         /* pathnames for each stage; may differ due to rename detection */
 241         const char *pathnames[3];
 242
 243         /* Whether this path is/was involved in a directory/file conflict */
 244         unsigned df_conflict:1;
 245
 246         /*
 247          * Whether this path is/was involved in a non-content conflict other
 248          * than a directory/file conflict (e.g. rename/rename, rename/delete,
 249          * file location based on possible directory rename).
 250          */
 251         unsigned path_conflict:1;
 252
 253         /*
 254          * For filemask and dirmask, the ith bit corresponds to whether the
 255          * ith entry is a file (filemask) or a directory (dirmask).  Thus,
 256          * filemask & dirmask is always zero, and filemask | dirmask is at
 257          * most 7 but can be less when a path does not appear as either a
 258          * file or a directory on at least one side of history.
 259          *
 260          * Note that these masks are related to enum merge_side, as the ith
 261          * entry corresponds to side i.
 262          *
 263          * These values come from a traverse_trees() call; more info may be
 264          * found looking at tree-walk.h's struct traverse_info,
 265          * particularly the documentation above the "fn" member (note that
 266          * filemask = mask & ~dirmask from that documentation).
 267          */
 268         unsigned filemask:3;
 269         unsigned dirmask:3;
 270
 271         /*
 272          * Optimization to track which stages match, to avoid the need to
 273          * recompute it in multiple steps. Either 0 or at least 2 bits are
 274          * set; if at least 2 bits are set, their corresponding stages match.
 275          */
 276         unsigned match_mask:3;
 277 };
 278
 279 /*** Function Grouping: various utility functions ***/
 280
 281 /*
 282  * For the next three macros, see warning for conflict_info.merged.
 283  *
 284  * In each of the below, mi is a struct merged_info*, and ci was defined
 285  * as a struct conflict_info* (but we need to verify ci isn't actually
 286  * pointed at a struct merged_info*).
 287  *
 288  * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise.
 289  * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one.
 290  * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first.
 291  */
 292 #define INITIALIZE_CI(ci, mi) do {                                           \
 293         (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \
 294 } while (0)
 295 #define VERIFY_CI(ci) assert(ci && !ci->merged.clean);
 296 #define ASSIGN_AND_VERIFY_CI(ci, mi) do {    \
 297         (ci) = (struct conflict_info *)(mi);  \
 298         assert((ci) && !(mi)->clean);        \
 299 } while (0)
 300
 301 static void free_strmap_strings(struct strmap *map)
 302 {
 303         struct hashmap_iter iter;
 304         struct strmap_entry *entry;
 305
 306         strmap_for_each_entry(map, &iter, entry) {
 307                 free((char*)entry->key);
 308         }
 309 }
 310
 311 static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 312                                           int reinitialize)
 313 {
 314         struct rename_info *renames = &opti->renames;
 315         int i;
 316         void (*strmap_func)(struct strmap *, int) =
 317                 reinitialize ? strmap_partial_clear : strmap_clear;
 318         void (*strset_func)(struct strset *) =
 319                 reinitialize ? strset_partial_clear : strset_clear;
 320
 321         /*
 322          * We marked opti->paths with strdup_strings = 0, so that we
 323          * wouldn't have to make another copy of the fullpath created by
 324          * make_traverse_path from setup_path_info().  But, now that we've
 325          * used it and have no other references to these strings, it is time
 326          * to deallocate them.
 327          */
 328         free_strmap_strings(&opti->paths);
 329         strmap_func(&opti->paths, 1);
 330
 331         /*
 332          * All keys and values in opti->conflicted are a subset of those in
 333          * opti->paths.  We don't want to deallocate anything twice, so we
 334          * don't free the keys and we pass 0 for free_values.
 335          */
 336         strmap_func(&opti->conflicted, 0);
 337
 338         /*
 339          * opti->paths_to_free is similar to opti->paths; we created it with
 340          * strdup_strings = 0 to avoid making _another_ copy of the fullpath
 341          * but now that we've used it and have no other references to these
 342          * strings, it is time to deallocate them.  We do so by temporarily
 343          * setting strdup_strings to 1.
 344          */
 345         opti->paths_to_free.strdup_strings = 1;
 346         string_list_clear(&opti->paths_to_free, 0);
 347         opti->paths_to_free.strdup_strings = 0;
 348
 349         /* Free memory used by various renames maps */
 350         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
 351                 struct hashmap_iter iter;
 352                 struct strmap_entry *entry;
 353
 354                 strset_func(&renames->dirs_removed[i]);
 355
 356                 strmap_for_each_entry(&renames->dir_rename_count[i],
 357                                       &iter, entry) {
 358                         struct strintmap *counts = entry->value;
 359                         strintmap_clear(counts);
 360                 }
 361                 strmap_func(&renames->dir_rename_count[i], 1);
 362
 363                 strmap_func(&renames->dir_renames[i], 0);
 364         }
 365
 366         if (!reinitialize) {
 367                 struct hashmap_iter iter;
 368                 struct strmap_entry *e;
 369
 370                 /* Release and free each strbuf found in output */
 371                 strmap_for_each_entry(&opti->output, &iter, e) {
 372                         struct strbuf *sb = e->value;
 373                         strbuf_release(sb);
 374                         /*
 375                          * While strictly speaking we don't need to free(sb)
 376                          * here because we could pass free_values=1 when
 377                          * calling strmap_clear() on opti->output, that would
 378                          * require strmap_clear to do another
 379                          * strmap_for_each_entry() loop, so we just free it
 380                          * while we're iterating anyway.
 381                          */
 382                         free(sb);
 383                 }
 384                 strmap_clear(&opti->output, 0);
 385         }
 386 }
 387
 388 static int err(struct merge_options *opt, const char *err, ...)
 389 {
 390         va_list params;
 391         struct strbuf sb = STRBUF_INIT;
 392
 393         strbuf_addstr(&sb, "error: ");
 394         va_start(params, err);
 395         strbuf_vaddf(&sb, err, params);
 396         va_end(params);
 397
 398         error("%s", sb.buf);
 399         strbuf_release(&sb);
 400
 401         return -1;
 402 }
 403
 404 __attribute__((format (printf, 4, 5)))
 405 static void path_msg(struct merge_options *opt,
 406                      const char *path,
 407                      int omittable_hint, /* skippable under --remerge-diff */
 408                      const char *fmt, ...)
 409 {
 410         va_list ap;
 411         struct strbuf *sb = strmap_get(&opt->priv->output, path);
 412         if (!sb) {
 413                 sb = xmalloc(sizeof(*sb));
 414                 strbuf_init(sb, 0);
 415                 strmap_put(&opt->priv->output, path, sb);
 416         }
 417
 418         va_start(ap, fmt);
 419         strbuf_vaddf(sb, fmt, ap);
 420         va_end(ap);
 421
 422         strbuf_addch(sb, '\n');
 423 }
 424
 425 /*** Function Grouping: functions related to collect_merge_info() ***/
 426
 427 static void setup_path_info(struct merge_options *opt,
 428                             struct string_list_item *result,
 429                             const char *current_dir_name,
 430                             int current_dir_name_len,
 431                             char *fullpath, /* we'll take over ownership */
 432                             struct name_entry *names,
 433                             struct name_entry *merged_version,
 434                             unsigned is_null,     /* boolean */
 435                             unsigned df_conflict, /* boolean */
 436                             unsigned filemask,
 437                             unsigned dirmask,
 438                             int resolved          /* boolean */)
 439 {
 440         /* result->util is void*, so mi is a convenience typed variable */
 441         struct merged_info *mi;
 442
 443         assert(!is_null || resolved);
 444         assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
 445         assert(resolved == (merged_version != NULL));
 446
 447         mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
 448                                    sizeof(struct conflict_info));
 449         mi->directory_name = current_dir_name;
 450         mi->basename_offset = current_dir_name_len;
 451         mi->clean = !!resolved;
 452         if (resolved) {
 453                 mi->result.mode = merged_version->mode;
 454                 oidcpy(&mi->result.oid, &merged_version->oid);
 455                 mi->is_null = !!is_null;
 456         } else {
 457                 int i;
 458                 struct conflict_info *ci;
 459
 460                 ASSIGN_AND_VERIFY_CI(ci, mi);
 461                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
 462                         ci->pathnames[i] = fullpath;
 463                         ci->stages[i].mode = names[i].mode;
 464                         oidcpy(&ci->stages[i].oid, &names[i].oid);
 465                 }
 466                 ci->filemask = filemask;
 467                 ci->dirmask = dirmask;
 468                 ci->df_conflict = !!df_conflict;
 469                 if (dirmask)
 470                         /*
 471                          * Assume is_null for now, but if we have entries
 472                          * under the directory then when it is complete in
 473                          * write_completed_directory() it'll update this.
 474                          * Also, for D/F conflicts, we have to handle the
 475                          * directory first, then clear this bit and process
 476                          * the file to see how it is handled -- that occurs
 477                          * near the top of process_entry().
 478                          */
 479                         mi->is_null = 1;
 480         }
 481         strmap_put(&opt->priv->paths, fullpath, mi);
 482         result->string = fullpath;
 483         result->util = mi;
 484 }
 485
 486 static void collect_rename_info(struct merge_options *opt,
 487                                 struct name_entry *names,
 488                                 const char *dirname,
 489                                 const char *fullname,
 490                                 unsigned filemask,
 491                                 unsigned dirmask,
 492                                 unsigned match_mask)
 493 {
 494         struct rename_info *renames = &opt->priv->renames;
 495
 496         /* Update dirs_removed, as needed */
 497         if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
 498                 /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
 499                 unsigned sides = (0x07 - dirmask)/2;
 500                 if (sides & 1)
 501                         strset_add(&renames->dirs_removed[1], fullname);
 502                 if (sides & 2)
 503                         strset_add(&renames->dirs_removed[2], fullname);
 504         }
 505 }
 506
 507 static int collect_merge_info_callback(int n,
 508                                        unsigned long mask,
 509                                        unsigned long dirmask,
 510                                        struct name_entry *names,
 511                                        struct traverse_info *info)
 512 {
 513         /*
 514          * n is 3.  Always.
 515          * common ancestor (mbase) has mask 1, and stored in index 0 of names
 516          * head of side 1  (side1) has mask 2, and stored in index 1 of names
 517          * head of side 2  (side2) has mask 4, and stored in index 2 of names
 518          */
 519         struct merge_options *opt = info->data;
 520         struct merge_options_internal *opti = opt->priv;
 521         struct string_list_item pi;  /* Path Info */
 522         struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
 523         struct name_entry *p;
 524         size_t len;
 525         char *fullpath;
 526         const char *dirname = opti->current_dir_name;
 527         unsigned filemask = mask & ~dirmask;
 528         unsigned match_mask = 0; /* will be updated below */
 529         unsigned mbase_null = !(mask & 1);
 530         unsigned side1_null = !(mask & 2);
 531         unsigned side2_null = !(mask & 4);
 532         unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
 533                                         names[0].mode == names[1].mode &&
 534                                         oideq(&names[0].oid, &names[1].oid));
 535         unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
 536                                         names[0].mode == names[2].mode &&
 537                                         oideq(&names[0].oid, &names[2].oid));
 538         unsigned sides_match = (!side1_null && !side2_null &&
 539                                 names[1].mode == names[2].mode &&
 540                                 oideq(&names[1].oid, &names[2].oid));
 541
 542         /*
 543          * Note: When a path is a file on one side of history and a directory
 544          * in another, we have a directory/file conflict.  In such cases, if
 545          * the conflict doesn't resolve from renames and deletions, then we
 546          * always leave directories where they are and move files out of the
 547          * way.  Thus, while struct conflict_info has a df_conflict field to
 548          * track such conflicts, we ignore that field for any directories at
 549          * a path and only pay attention to it for files at the given path.
 550          * The fact that we leave directories were they are also means that
 551          * we do not need to worry about getting additional df_conflict
 552          * information propagated from parent directories down to children
 553          * (unlike, say traverse_trees_recursive() in unpack-trees.c, which
 554          * sets a newinfo.df_conflicts field specifically to propagate it).
 555          */
 556         unsigned df_conflict = (filemask != 0) && (dirmask != 0);
 557
 558         /* n = 3 is a fundamental assumption. */
 559         if (n != 3)
 560                 BUG("Called collect_merge_info_callback wrong");
 561
 562         /*
 563          * A bunch of sanity checks verifying that traverse_trees() calls
 564          * us the way I expect.  Could just remove these at some point,
 565          * though maybe they are helpful to future code readers.
 566          */
 567         assert(mbase_null == is_null_oid(&names[0].oid));
 568         assert(side1_null == is_null_oid(&names[1].oid));
 569         assert(side2_null == is_null_oid(&names[2].oid));
 570         assert(!mbase_null || !side1_null || !side2_null);
 571         assert(mask > 0 && mask < 8);
 572
 573         /* Determine match_mask */
 574         if (side1_matches_mbase)
 575                 match_mask = (side2_matches_mbase ? 7 : 3);
 576         else if (side2_matches_mbase)
 577                 match_mask = 5;
 578         else if (sides_match)
 579                 match_mask = 6;
 580
 581         /*
 582          * Get the name of the relevant filepath, which we'll pass to
 583          * setup_path_info() for tracking.
 584          */
 585         p = names;
 586         while (!p->mode)
 587                 p++;
 588         len = traverse_path_len(info, p->pathlen);
 589
 590         /* +1 in both of the following lines to include the NUL byte */
 591         fullpath = xmalloc(len + 1);
 592         make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
 593
 594         /*
 595          * If mbase, side1, and side2 all match, we can resolve early.  Even
 596          * if these are trees, there will be no renames or anything
 597          * underneath.
 598          */
 599         if (side1_matches_mbase && side2_matches_mbase) {
 600                 /* mbase, side1, & side2 all match; use mbase as resolution */
 601                 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
 602                                 names, names+0, mbase_null, 0,
 603                                 filemask, dirmask, 1);
 604                 return mask;
 605         }
 606
 607         /*
 608          * Gather additional information used in rename detection.
 609          */
 610         collect_rename_info(opt, names, dirname, fullpath,
 611                             filemask, dirmask, match_mask);
 612
 613         /*
 614          * Record information about the path so we can resolve later in
 615          * process_entries.
 616          */
 617         setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
 618                         names, NULL, 0, df_conflict, filemask, dirmask, 0);
 619
 620         ci = pi.util;
 621         VERIFY_CI(ci);
 622         ci->match_mask = match_mask;
 623
 624         /* If dirmask, recurse into subdirectories */
 625         if (dirmask) {
 626                 struct traverse_info newinfo;
 627                 struct tree_desc t[3];
 628                 void *buf[3] = {NULL, NULL, NULL};
 629                 const char *original_dir_name;
 630                 int i, ret;
 631
 632                 ci->match_mask &= filemask;
 633                 newinfo = *info;
 634                 newinfo.prev = info;
 635                 newinfo.name = p->path;
 636                 newinfo.namelen = p->pathlen;
 637                 newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
 638                 /*
 639                  * If this directory we are about to recurse into cared about
 640                  * its parent directory (the current directory) having a D/F
 641                  * conflict, then we'd propagate the masks in this way:
 642                  *    newinfo.df_conflicts |= (mask & ~dirmask);
 643                  * But we don't worry about propagating D/F conflicts.  (See
 644                  * comment near setting of local df_conflict variable near
 645                  * the beginning of this function).
 646                  */
 647
 648                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
 649                         if (i == 1 && side1_matches_mbase)
 650                                 t[1] = t[0];
 651                         else if (i == 2 && side2_matches_mbase)
 652                                 t[2] = t[0];
 653                         else if (i == 2 && sides_match)
 654                                 t[2] = t[1];
 655                         else {
 656                                 const struct object_id *oid = NULL;
 657                                 if (dirmask & 1)
 658                                         oid = &names[i].oid;
 659                                 buf[i] = fill_tree_descriptor(opt->repo,
 660                                                               t + i, oid);
 661                         }
 662                         dirmask >>= 1;
 663                 }
 664
 665                 original_dir_name = opti->current_dir_name;
 666                 opti->current_dir_name = pi.string;
 667                 ret = traverse_trees(NULL, 3, t, &newinfo);
 668                 opti->current_dir_name = original_dir_name;
 669
 670                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
 671                         free(buf[i]);
 672
 673                 if (ret < 0)
 674                         return -1;
 675         }
 676
 677         return mask;
 678 }
 679
 680 static int collect_merge_info(struct merge_options *opt,
 681                               struct tree *merge_base,
 682                               struct tree *side1,
 683                               struct tree *side2)
 684 {
 685         int ret;
 686         struct tree_desc t[3];
 687         struct traverse_info info;
 688
 689         opt->priv->toplevel_dir = "";
 690         opt->priv->current_dir_name = opt->priv->toplevel_dir;
 691         setup_traverse_info(&info, opt->priv->toplevel_dir);
 692         info.fn = collect_merge_info_callback;
 693         info.data = opt;
 694         info.show_all_errors = 1;
 695
 696         parse_tree(merge_base);
 697         parse_tree(side1);
 698         parse_tree(side2);
 699         init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
 700         init_tree_desc(t + 1, side1->buffer, side1->size);
 701         init_tree_desc(t + 2, side2->buffer, side2->size);
 702
 703         ret = traverse_trees(NULL, 3, t, &info);
 704
 705         return ret;
 706 }
 707
 708 /*** Function Grouping: functions related to threeway content merges ***/
 709
 710 static int handle_content_merge(struct merge_options *opt,
 711                                 const char *path,
 712                                 const struct version_info *o,
 713                                 const struct version_info *a,
 714                                 const struct version_info *b,
 715                                 const char *pathnames[3],
 716                                 const int extra_marker_size,
 717                                 struct version_info *result)
 718 {
 719         die("Not yet implemented");
 720 }
 721
 722 /*** Function Grouping: functions related to detect_and_process_renames(), ***
 723  *** which are split into directory and regular rename detection sections. ***/
 724
 725 /*** Function Grouping: functions related to directory rename detection ***/
 726
 727 struct collision_info {
 728         struct string_list source_files;
 729         unsigned reported_already:1;
 730 };
 731
 732 /*
 733  * Return a new string that replaces the beginning portion (which matches
 734  * rename_info->key), with rename_info->util.new_dir.  In perl-speak:
 735  *   new_path_name = (old_path =~ s/rename_info->key/rename_info->value/);
 736  * NOTE:
 737  *   Caller must ensure that old_path starts with rename_info->key + '/'.
 738  */
 739 static char *apply_dir_rename(struct strmap_entry *rename_info,
 740                               const char *old_path)
 741 {
 742         struct strbuf new_path = STRBUF_INIT;
 743         const char *old_dir = rename_info->key;
 744         const char *new_dir = rename_info->value;
 745         int oldlen, newlen, new_dir_len;
 746
 747         oldlen = strlen(old_dir);
 748         if (*new_dir == '\0')
 749                 /*
 750                  * If someone renamed/merged a subdirectory into the root
 751                  * directory (e.g. 'some/subdir' -> ''), then we want to
 752                  * avoid returning
 753                  *     '' + '/filename'
 754                  * as the rename; we need to make old_path + oldlen advance
 755                  * past the '/' character.
 756                  */
 757                 oldlen++;
 758         new_dir_len = strlen(new_dir);
 759         newlen = new_dir_len + (strlen(old_path) - oldlen) + 1;
 760         strbuf_grow(&new_path, newlen);
 761         strbuf_add(&new_path, new_dir, new_dir_len);
 762         strbuf_addstr(&new_path, &old_path[oldlen]);
 763
 764         return strbuf_detach(&new_path, NULL);
 765 }
 766
 767 static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask)
 768 {
 769         struct merged_info *mi = strmap_get(paths, path);
 770         struct conflict_info *ci;
 771         if (!mi)
 772                 return 0;
 773         INITIALIZE_CI(ci, mi);
 774         return mi->clean || (side_mask & (ci->filemask | ci->dirmask));
 775 }
 776
 777 /*
 778  * See if there is a directory rename for path, and if there are any file
 779  * level conflicts on the given side for the renamed location.  If there is
 780  * a rename and there are no conflicts, return the new name.  Otherwise,
 781  * return NULL.
 782  */
 783 static char *handle_path_level_conflicts(struct merge_options *opt,
 784                                          const char *path,
 785                                          unsigned side_index,
 786                                          struct strmap_entry *rename_info,
 787                                          struct strmap *collisions)
 788 {
 789         char *new_path = NULL;
 790         struct collision_info *c_info;
 791         int clean = 1;
 792         struct strbuf collision_paths = STRBUF_INIT;
 793
 794         /*
 795          * entry has the mapping of old directory name to new directory name
 796          * that we want to apply to path.
 797          */
 798         new_path = apply_dir_rename(rename_info, path);
 799         if (!new_path)
 800                 BUG("Failed to apply directory rename!");
 801
 802         /*
 803          * The caller needs to have ensured that it has pre-populated
 804          * collisions with all paths that map to new_path.  Do a quick check
 805          * to ensure that's the case.
 806          */
 807         c_info = strmap_get(collisions, new_path);
 808         if (c_info == NULL)
 809                 BUG("c_info is NULL");
 810
 811         /*
 812          * Check for one-sided add/add/.../add conflicts, i.e.
 813          * where implicit renames from the other side doing
 814          * directory rename(s) can affect this side of history
 815          * to put multiple paths into the same location.  Warn
 816          * and bail on directory renames for such paths.
 817          */
 818         if (c_info->reported_already) {
 819                 clean = 0;
 820         } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) {
 821                 c_info->reported_already = 1;
 822                 strbuf_add_separated_string_list(&collision_paths, ", ",
 823                                                  &c_info->source_files);
 824                 path_msg(opt, new_path, 0,
 825                          _("CONFLICT (implicit dir rename): Existing file/dir "
 826                            "at %s in the way of implicit directory rename(s) "
 827                            "putting the following path(s) there: %s."),
 828                        new_path, collision_paths.buf);
 829                 clean = 0;
 830         } else if (c_info->source_files.nr > 1) {
 831                 c_info->reported_already = 1;
 832                 strbuf_add_separated_string_list(&collision_paths, ", ",
 833                                                  &c_info->source_files);
 834                 path_msg(opt, new_path, 0,
 835                          _("CONFLICT (implicit dir rename): Cannot map more "
 836                            "than one path to %s; implicit directory renames "
 837                            "tried to put these paths there: %s"),
 838                        new_path, collision_paths.buf);
 839                 clean = 0;
 840         }
 841
 842         /* Free memory we no longer need */
 843         strbuf_release(&collision_paths);
 844         if (!clean && new_path) {
 845                 free(new_path);
 846                 return NULL;
 847         }
 848
 849         return new_path;
 850 }
 851
 852 static void dirname_munge(char *filename)
 853 {
 854         char *slash = strrchr(filename, '/');
 855         if (!slash)
 856                 slash = filename;
 857         *slash = '\0';
 858 }
 859
 860 static void increment_count(struct strmap *dir_rename_count,
 861                             char *old_dir,
 862                             char *new_dir)
 863 {
 864         struct strintmap *counts;
 865         struct strmap_entry *e;
 866
 867         /* Get the {new_dirs -> counts} mapping using old_dir */
 868         e = strmap_get_entry(dir_rename_count, old_dir);
 869         if (e) {
 870                 counts = e->value;
 871         } else {
 872                 counts = xmalloc(sizeof(*counts));
 873                 strintmap_init_with_options(counts, 0, NULL, 1);
 874                 strmap_put(dir_rename_count, old_dir, counts);
 875         }
 876
 877         /* Increment the count for new_dir */
 878         strintmap_incr(counts, new_dir, 1);
 879 }
 880
 881 static void update_dir_rename_counts(struct strmap *dir_rename_count,
 882                                      struct strset *dirs_removed,
 883                                      const char *oldname,
 884                                      const char *newname)
 885 {
 886         char *old_dir = xstrdup(oldname);
 887         char *new_dir = xstrdup(newname);
 888         char new_dir_first_char = new_dir[0];
 889         int first_time_in_loop = 1;
 890
 891         while (1) {
 892                 dirname_munge(old_dir);
 893                 dirname_munge(new_dir);
 894
 895                 /*
 896                  * When renaming
 897                  *   "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
 898                  * then this suggests that both
 899                  *   a/b/c/d/e/ => a/b/some/thing/else/e/
 900                  *   a/b/c/d/   => a/b/some/thing/else/
 901                  * so we want to increment counters for both.  We do NOT,
 902                  * however, also want to suggest that there was the following
 903                  * rename:
 904                  *   a/b/c/ => a/b/some/thing/
 905                  * so we need to quit at that point.
 906                  *
 907                  * Note the when first_time_in_loop, we only strip off the
 908                  * basename, and we don't care if that's different.
 909                  */
 910                 if (!first_time_in_loop) {
 911                         char *old_sub_dir = strchr(old_dir, '\0')+1;
 912                         char *new_sub_dir = strchr(new_dir, '\0')+1;
 913                         if (!*new_dir) {
 914                                 /*
 915                                  * Special case when renaming to root directory,
 916                                  * i.e. when new_dir == "".  In this case, we had
 917                                  * something like
 918                                  *    a/b/subdir => subdir
 919                                  * and so dirname_munge() sets things up so that
 920                                  *    old_dir = "a/b\0subdir\0"
 921                                  *    new_dir = "\0ubdir\0"
 922                                  * We didn't have a '/' to overwrite a '\0' onto
 923                                  * in new_dir, so we have to compare differently.
 924                                  */
 925                                 if (new_dir_first_char != old_sub_dir[0] ||
 926                                     strcmp(old_sub_dir+1, new_sub_dir))
 927                                         break;
 928                         } else {
 929                                 if (strcmp(old_sub_dir, new_sub_dir))
 930                                         break;
 931                         }
 932                 }
 933
 934                 if (strset_contains(dirs_removed, old_dir))
 935                         increment_count(dir_rename_count, old_dir, new_dir);
 936                 else
 937                         break;
 938
 939                 /* If we hit toplevel directory ("") for old or new dir, quit */
 940                 if (!*old_dir || !*new_dir)
 941                         break;
 942
 943                 first_time_in_loop = 0;
 944         }
 945
 946         /* Free resources we don't need anymore */
 947         free(old_dir);
 948         free(new_dir);
 949 }
 950
 951 static void compute_rename_counts(struct diff_queue_struct *pairs,
 952                                   struct strmap *dir_rename_count,
 953                                   struct strset *dirs_removed)
 954 {
 955         int i;
 956
 957         for (i = 0; i < pairs->nr; ++i) {
 958                 struct diff_filepair *pair = pairs->queue[i];
 959
 960                 /* File not part of directory rename if it wasn't renamed */
 961                 if (pair->status != 'R')
 962                         continue;
 963
 964                 /*
 965                  * Make dir_rename_count contain a map of a map:
 966                  *   old_directory -> {new_directory -> count}
 967                  * In other words, for every pair look at the directories for
 968                  * the old filename and the new filename and count how many
 969                  * times that pairing occurs.
 970                  */
 971                 update_dir_rename_counts(dir_rename_count, dirs_removed,
 972                                          pair->one->path,
 973                                          pair->two->path);
 974         }
 975 }
 976
 977 static void get_provisional_directory_renames(struct merge_options *opt,
 978                                               unsigned side,
 979                                               int *clean)
 980 {
 981         struct hashmap_iter iter;
 982         struct strmap_entry *entry;
 983         struct rename_info *renames = &opt->priv->renames;
 984
 985         compute_rename_counts(&renames->pairs[side],
 986                               &renames->dir_rename_count[side],
 987                               &renames->dirs_removed[side]);
 988         /*
 989          * Collapse
 990          *    dir_rename_count: old_directory -> {new_directory -> count}
 991          * down to
 992          *    dir_renames: old_directory -> best_new_directory
 993          * where best_new_directory is the one with the unique highest count.
 994          */
 995         strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
 996                 const char *source_dir = entry->key;
 997                 struct strintmap *counts = entry->value;
 998                 struct hashmap_iter count_iter;
 999                 struct strmap_entry *count_entry;
1000                 int max = 0;
1001                 int bad_max = 0;
1002                 const char *best = NULL;
1003
1004                 strintmap_for_each_entry(counts, &count_iter, count_entry) {
1005                         const char *target_dir = count_entry->key;
1006                         intptr_t count = (intptr_t)count_entry->value;
1007
1008                         if (count == max)
1009                                 bad_max = max;
1010                         else if (count > max) {
1011                                 max = count;
1012                                 best = target_dir;
1013                         }
1014                 }
1015
1016                 if (bad_max == max) {
1017                         path_msg(opt, source_dir, 0,
1018                                _("CONFLICT (directory rename split): "
1019                                  "Unclear where to rename %s to; it was "
1020                                  "renamed to multiple other directories, with "
1021                                  "no destination getting a majority of the "
1022                                  "files."),
1023                                source_dir);
1024                         *clean = 0;
1025                 } else {
1026                         strmap_put(&renames->dir_renames[side],
1027                                    source_dir, (void*)best);
1028                 }
1029         }
1030 }
1031
1032 static void handle_directory_level_conflicts(struct merge_options *opt)
1033 {
1034         struct hashmap_iter iter;
1035         struct strmap_entry *entry;
1036         struct string_list duplicated = STRING_LIST_INIT_NODUP;
1037         struct rename_info *renames = &opt->priv->renames;
1038         struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1];
1039         struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2];
1040         int i;
1041
1042         strmap_for_each_entry(side1_dir_renames, &iter, entry) {
1043                 if (strmap_contains(side2_dir_renames, entry->key))
1044                         string_list_append(&duplicated, entry->key);
1045         }
1046
1047         for (i = 0; i < duplicated.nr; i++) {
1048                 strmap_remove(side1_dir_renames, duplicated.items[i].string, 0);
1049                 strmap_remove(side2_dir_renames, duplicated.items[i].string, 0);
1050         }
1051         string_list_clear(&duplicated, 0);
1052 }
1053
1054 static struct strmap_entry *check_dir_renamed(const char *path,
1055                                               struct strmap *dir_renames)
1056 {
1057         char *temp = xstrdup(path);
1058         char *end;
1059         struct strmap_entry *e = NULL;
1060
1061         while ((end = strrchr(temp, '/'))) {
1062                 *end = '\0';
1063                 e = strmap_get_entry(dir_renames, temp);
1064                 if (e)
1065                         break;
1066         }
1067         free(temp);
1068         return e;
1069 }
1070
1071 static void compute_collisions(struct strmap *collisions,
1072                                struct strmap *dir_renames,
1073                                struct diff_queue_struct *pairs)
1074 {
1075         int i;
1076
1077         strmap_init_with_options(collisions, NULL, 0);
1078         if (strmap_empty(dir_renames))
1079                 return;
1080
1081         /*
1082          * Multiple files can be mapped to the same path due to directory
1083          * renames done by the other side of history.  Since that other
1084          * side of history could have merged multiple directories into one,
1085          * if our side of history added the same file basename to each of
1086          * those directories, then all N of them would get implicitly
1087          * renamed by the directory rename detection into the same path,
1088          * and we'd get an add/add/.../add conflict, and all those adds
1089          * from *this* side of history.  This is not representable in the
1090          * index, and users aren't going to easily be able to make sense of
1091          * it.  So we need to provide a good warning about what's
1092          * happening, and fall back to no-directory-rename detection
1093          * behavior for those paths.
1094          *
1095          * See testcases 9e and all of section 5 from t6043 for examples.
1096          */
1097         for (i = 0; i < pairs->nr; ++i) {
1098                 struct strmap_entry *rename_info;
1099                 struct collision_info *collision_info;
1100                 char *new_path;
1101                 struct diff_filepair *pair = pairs->queue[i];
1102
1103                 if (pair->status != 'A' && pair->status != 'R')
1104                         continue;
1105                 rename_info = check_dir_renamed(pair->two->path, dir_renames);
1106                 if (!rename_info)
1107                         continue;
1108
1109                 new_path = apply_dir_rename(rename_info, pair->two->path);
1110                 assert(new_path);
1111                 collision_info = strmap_get(collisions, new_path);
1112                 if (collision_info) {
1113                         free(new_path);
1114                 } else {
1115                         collision_info = xcalloc(1,
1116                                                  sizeof(struct collision_info));
1117                         string_list_init(&collision_info->source_files, 0);
1118                         strmap_put(collisions, new_path, collision_info);
1119                 }
1120                 string_list_insert(&collision_info->source_files,
1121                                    pair->two->path);
1122         }
1123 }
1124
1125 static char *check_for_directory_rename(struct merge_options *opt,
1126                                         const char *path,
1127                                         unsigned side_index,
1128                                         struct strmap *dir_renames,
1129                                         struct strmap *dir_rename_exclusions,
1130                                         struct strmap *collisions,
1131                                         int *clean_merge)
1132 {
1133         char *new_path = NULL;
1134         struct strmap_entry *rename_info;
1135         struct strmap_entry *otherinfo = NULL;
1136         const char *new_dir;
1137
1138         if (strmap_empty(dir_renames))
1139                 return new_path;
1140         rename_info = check_dir_renamed(path, dir_renames);
1141         if (!rename_info)
1142                 return new_path;
1143         /* old_dir = rename_info->key; */
1144         new_dir = rename_info->value;
1145
1146         /*
1147          * This next part is a little weird.  We do not want to do an
1148          * implicit rename into a directory we renamed on our side, because
1149          * that will result in a spurious rename/rename(1to2) conflict.  An
1150          * example:
1151          *   Base commit: dumbdir/afile, otherdir/bfile
1152          *   Side 1:      smrtdir/afile, otherdir/bfile
1153          *   Side 2:      dumbdir/afile, dumbdir/bfile
1154          * Here, while working on Side 1, we could notice that otherdir was
1155          * renamed/merged to dumbdir, and change the diff_filepair for
1156          * otherdir/bfile into a rename into dumbdir/bfile.  However, Side
1157          * 2 will notice the rename from dumbdir to smrtdir, and do the
1158          * transitive rename to move it from dumbdir/bfile to
1159          * smrtdir/bfile.  That gives us bfile in dumbdir vs being in
1160          * smrtdir, a rename/rename(1to2) conflict.  We really just want
1161          * the file to end up in smrtdir.  And the way to achieve that is
1162          * to not let Side1 do the rename to dumbdir, since we know that is
1163          * the source of one of our directory renames.
1164          *
1165          * That's why otherinfo and dir_rename_exclusions is here.
1166          *
1167          * As it turns out, this also prevents N-way transient rename
1168          * confusion; See testcases 9c and 9d of t6043.
1169          */
1170         otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir);
1171         if (otherinfo) {
1172                 path_msg(opt, rename_info->key, 1,
1173                          _("WARNING: Avoiding applying %s -> %s rename "
1174                            "to %s, because %s itself was renamed."),
1175                          rename_info->key, new_dir, path, new_dir);
1176                 return NULL;
1177         }
1178
1179         new_path = handle_path_level_conflicts(opt, path, side_index,
1180                                                rename_info, collisions);
1181         *clean_merge &= (new_path != NULL);
1182
1183         return new_path;
1184 }
1185
1186 static void apply_directory_rename_modifications(struct merge_options *opt,
1187                                                  struct diff_filepair *pair,
1188                                                  char *new_path)
1189 {
1190         /*
1191          * The basic idea is to get the conflict_info from opt->priv->paths
1192          * at old path, and insert it into new_path; basically just this:
1193          *     ci = strmap_get(&opt->priv->paths, old_path);
1194          *     strmap_remove(&opt->priv->paths, old_path, 0);
1195          *     strmap_put(&opt->priv->paths, new_path, ci);
1196          * However, there are some factors complicating this:
1197          *     - opt->priv->paths may already have an entry at new_path
1198          *     - Each ci tracks its containing directory, so we need to
1199          *       update that
1200          *     - If another ci has the same containing directory, then
1201          *       the two char*'s MUST point to the same location.  See the
1202          *       comment in struct merged_info.  strcmp equality is not
1203          *       enough; we need pointer equality.
1204          *     - opt->priv->paths must hold the parent directories of any
1205          *       entries that are added.  So, if this directory rename
1206          *       causes entirely new directories, we must recursively add
1207          *       parent directories.
1208          *     - For each parent directory added to opt->priv->paths, we
1209          *       also need to get its parent directory stored in its
1210          *       conflict_info->merged.directory_name with all the same
1211          *       requirements about pointer equality.
1212          */
1213         struct string_list dirs_to_insert = STRING_LIST_INIT_NODUP;
1214         struct conflict_info *ci, *new_ci;
1215         struct strmap_entry *entry;
1216         const char *branch_with_new_path, *branch_with_dir_rename;
1217         const char *old_path = pair->two->path;
1218         const char *parent_name;
1219         const char *cur_path;
1220         int i, len;
1221
1222         entry = strmap_get_entry(&opt->priv->paths, old_path);
1223         old_path = entry->key;
1224         ci = entry->value;
1225         VERIFY_CI(ci);
1226
1227         /* Find parent directories missing from opt->priv->paths */
1228         cur_path = new_path;
1229         while (1) {
1230                 /* Find the parent directory of cur_path */
1231                 char *last_slash = strrchr(cur_path, '/');
1232                 if (last_slash) {
1233                         parent_name = xstrndup(cur_path, last_slash - cur_path);
1234                 } else {
1235                         parent_name = opt->priv->toplevel_dir;
1236                         break;
1237                 }
1238
1239                 /* Look it up in opt->priv->paths */
1240                 entry = strmap_get_entry(&opt->priv->paths, parent_name);
1241                 if (entry) {
1242                         free((char*)parent_name);
1243                         parent_name = entry->key; /* reuse known pointer */
1244                         break;
1245                 }
1246
1247                 /* Record this is one of the directories we need to insert */
1248                 string_list_append(&dirs_to_insert, parent_name);
1249                 cur_path = parent_name;
1250         }
1251
1252         /* Traverse dirs_to_insert and insert them into opt->priv->paths */
1253         for (i = dirs_to_insert.nr-1; i >= 0; --i) {
1254                 struct conflict_info *dir_ci;
1255                 char *cur_dir = dirs_to_insert.items[i].string;
1256
1257                 dir_ci = xcalloc(1, sizeof(*dir_ci));
1258
1259                 dir_ci->merged.directory_name = parent_name;
1260                 len = strlen(parent_name);
1261                 /* len+1 because of trailing '/' character */
1262                 dir_ci->merged.basename_offset = (len > 0 ? len+1 : len);
1263                 dir_ci->dirmask = ci->filemask;
1264                 strmap_put(&opt->priv->paths, cur_dir, dir_ci);
1265
1266                 parent_name = cur_dir;
1267         }
1268
1269         /*
1270          * We are removing old_path from opt->priv->paths.  old_path also will
1271          * eventually need to be freed, but it may still be used by e.g.
1272          * ci->pathnames.  So, store it in another string-list for now.
1273          */
1274         string_list_append(&opt->priv->paths_to_free, old_path);
1275
1276         assert(ci->filemask == 2 || ci->filemask == 4);
1277         assert(ci->dirmask == 0);
1278         strmap_remove(&opt->priv->paths, old_path, 0);
1279
1280         branch_with_new_path   = (ci->filemask == 2) ? opt->branch1 : opt->branch2;
1281         branch_with_dir_rename = (ci->filemask == 2) ? opt->branch2 : opt->branch1;
1282
1283         /* Now, finally update ci and stick it into opt->priv->paths */
1284         ci->merged.directory_name = parent_name;
1285         len = strlen(parent_name);
1286         ci->merged.basename_offset = (len > 0 ? len+1 : len);
1287         new_ci = strmap_get(&opt->priv->paths, new_path);
1288         if (!new_ci) {
1289                 /* Place ci back into opt->priv->paths, but at new_path */
1290                 strmap_put(&opt->priv->paths, new_path, ci);
1291         } else {
1292                 int index;
1293
1294                 /* A few sanity checks */
1295                 VERIFY_CI(new_ci);
1296                 assert(ci->filemask == 2 || ci->filemask == 4);
1297                 assert((new_ci->filemask & ci->filemask) == 0);
1298                 assert(!new_ci->merged.clean);
1299
1300                 /* Copy stuff from ci into new_ci */
1301                 new_ci->filemask |= ci->filemask;
1302                 if (new_ci->dirmask)
1303                         new_ci->df_conflict = 1;
1304                 index = (ci->filemask >> 1);
1305                 new_ci->pathnames[index] = ci->pathnames[index];
1306                 new_ci->stages[index].mode = ci->stages[index].mode;
1307                 oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
1308
1309                 free(ci);
1310                 ci = new_ci;
1311         }
1312
1313         if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE) {
1314                 /* Notify user of updated path */
1315                 if (pair->status == 'A')
1316                         path_msg(opt, new_path, 1,
1317                                  _("Path updated: %s added in %s inside a "
1318                                    "directory that was renamed in %s; moving "
1319                                    "it to %s."),
1320                                  old_path, branch_with_new_path,
1321                                  branch_with_dir_rename, new_path);
1322                 else
1323                         path_msg(opt, new_path, 1,
1324                                  _("Path updated: %s renamed to %s in %s, "
1325                                    "inside a directory that was renamed in %s; "
1326                                    "moving it to %s."),
1327                                  pair->one->path, old_path, branch_with_new_path,
1328                                  branch_with_dir_rename, new_path);
1329         } else {
1330                 /*
1331                  * opt->detect_directory_renames has the value
1332                  * MERGE_DIRECTORY_RENAMES_CONFLICT, so mark these as conflicts.
1333                  */
1334                 ci->path_conflict = 1;
1335                 if (pair->status == 'A')
1336                         path_msg(opt, new_path, 0,
1337                                  _("CONFLICT (file location): %s added in %s "
1338                                    "inside a directory that was renamed in %s, "
1339                                    "suggesting it should perhaps be moved to "
1340                                    "%s."),
1341                                  old_path, branch_with_new_path,
1342                                  branch_with_dir_rename, new_path);
1343                 else
1344                         path_msg(opt, new_path, 0,
1345                                  _("CONFLICT (file location): %s renamed to %s "
1346                                    "in %s, inside a directory that was renamed "
1347                                    "in %s, suggesting it should perhaps be "
1348                                    "moved to %s."),
1349                                  pair->one->path, old_path, branch_with_new_path,
1350                                  branch_with_dir_rename, new_path);
1351         }
1352
1353         /*
1354          * Finally, record the new location.
1355          */
1356         pair->two->path = new_path;
1357 }
1358
1359 /*** Function Grouping: functions related to regular rename detection ***/
1360
1361 static int process_renames(struct merge_options *opt,
1362                            struct diff_queue_struct *renames)
1363 {
1364         int clean_merge = 1, i;
1365
1366         for (i = 0; i < renames->nr; ++i) {
1367                 const char *oldpath = NULL, *newpath;
1368                 struct diff_filepair *pair = renames->queue[i];
1369                 struct conflict_info *oldinfo = NULL, *newinfo = NULL;
1370                 struct strmap_entry *old_ent, *new_ent;
1371                 unsigned int old_sidemask;
1372                 int target_index, other_source_index;
1373                 int source_deleted, collision, type_changed;
1374                 const char *rename_branch = NULL, *delete_branch = NULL;
1375
1376                 old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path);
1377                 new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path);
1378                 if (old_ent) {
1379                         oldpath = old_ent->key;
1380                         oldinfo = old_ent->value;
1381                 }
1382                 newpath = pair->two->path;
1383                 if (new_ent) {
1384                         newpath = new_ent->key;
1385                         newinfo = new_ent->value;
1386                 }
1387
1388                 /*
1389                  * If pair->one->path isn't in opt->priv->paths, that means
1390                  * that either directory rename detection removed that
1391                  * path, or a parent directory of oldpath was resolved and
1392                  * we don't even need the rename; in either case, we can
1393                  * skip it.  If oldinfo->merged.clean, then the other side
1394                  * of history had no changes to oldpath and we don't need
1395                  * the rename and can skip it.
1396                  */
1397                 if (!oldinfo || oldinfo->merged.clean)
1398                         continue;
1399
1400                 /*
1401                  * diff_filepairs have copies of pathnames, thus we have to
1402                  * use standard 'strcmp()' (negated) instead of '=='.
1403                  */
1404                 if (i + 1 < renames->nr &&
1405                     !strcmp(oldpath, renames->queue[i+1]->one->path)) {
1406                         /* Handle rename/rename(1to2) or rename/rename(1to1) */
1407                         const char *pathnames[3];
1408                         struct version_info merged;
1409                         struct conflict_info *base, *side1, *side2;
1410                         unsigned was_binary_blob = 0;
1411
1412                         pathnames[0] = oldpath;
1413                         pathnames[1] = newpath;
1414                         pathnames[2] = renames->queue[i+1]->two->path;
1415
1416                         base = strmap_get(&opt->priv->paths, pathnames[0]);
1417                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
1418                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
1419
1420                         VERIFY_CI(base);
1421                         VERIFY_CI(side1);
1422                         VERIFY_CI(side2);
1423
1424                         if (!strcmp(pathnames[1], pathnames[2])) {
1425                                 /* Both sides renamed the same way */
1426                                 assert(side1 == side2);
1427                                 memcpy(&side1->stages[0], &base->stages[0],
1428                                        sizeof(merged));
1429                                 side1->filemask |= (1 << MERGE_BASE);
1430                                 /* Mark base as resolved by removal */
1431                                 base->merged.is_null = 1;
1432                                 base->merged.clean = 1;
1433
1434                                 /* We handled both renames, i.e. i+1 handled */
1435                                 i++;
1436                                 /* Move to next rename */
1437                                 continue;
1438                         }
1439
1440                         /* This is a rename/rename(1to2) */
1441                         clean_merge = handle_content_merge(opt,
1442                                                            pair->one->path,
1443                                                            &base->stages[0],
1444                                                            &side1->stages[1],
1445                                                            &side2->stages[2],
1446                                                            pathnames,
1447                                                            1 + 2 * opt->priv->call_depth,
1448                                                            &merged);
1449                         if (!clean_merge &&
1450                             merged.mode == side1->stages[1].mode &&
1451                             oideq(&merged.oid, &side1->stages[1].oid))
1452                                 was_binary_blob = 1;
1453                         memcpy(&side1->stages[1], &merged, sizeof(merged));
1454                         if (was_binary_blob) {
1455                                 /*
1456                                  * Getting here means we were attempting to
1457                                  * merge a binary blob.
1458                                  *
1459                                  * Since we can't merge binaries,
1460                                  * handle_content_merge() just takes one
1461                                  * side.  But we don't want to copy the
1462                                  * contents of one side to both paths.  We
1463                                  * used the contents of side1 above for
1464                                  * side1->stages, let's use the contents of
1465                                  * side2 for side2->stages below.
1466                                  */
1467                                 oidcpy(&merged.oid, &side2->stages[2].oid);
1468                                 merged.mode = side2->stages[2].mode;
1469                         }
1470                         memcpy(&side2->stages[2], &merged, sizeof(merged));
1471
1472                         side1->path_conflict = 1;
1473                         side2->path_conflict = 1;
1474                         /*
1475                          * TODO: For renames we normally remove the path at the
1476                          * old name.  It would thus seem consistent to do the
1477                          * same for rename/rename(1to2) cases, but we haven't
1478                          * done so traditionally and a number of the regression
1479                          * tests now encode an expectation that the file is
1480                          * left there at stage 1.  If we ever decide to change
1481                          * this, add the following two lines here:
1482                          *    base->merged.is_null = 1;
1483                          *    base->merged.clean = 1;
1484                          * and remove the setting of base->path_conflict to 1.
1485                          */
1486                         base->path_conflict = 1;
1487                         path_msg(opt, oldpath, 0,
1488                                  _("CONFLICT (rename/rename): %s renamed to "
1489                                    "%s in %s and to %s in %s."),
1490                                  pathnames[0],
1491                                  pathnames[1], opt->branch1,
1492                                  pathnames[2], opt->branch2);
1493
1494                         i++; /* We handled both renames, i.e. i+1 handled */
1495                         continue;
1496                 }
1497
1498                 VERIFY_CI(oldinfo);
1499                 VERIFY_CI(newinfo);
1500                 target_index = pair->score; /* from collect_renames() */
1501                 assert(target_index == 1 || target_index == 2);
1502                 other_source_index = 3 - target_index;
1503                 old_sidemask = (1 << other_source_index); /* 2 or 4 */
1504                 source_deleted = (oldinfo->filemask == 1);
1505                 collision = ((newinfo->filemask & old_sidemask) != 0);
1506                 type_changed = !source_deleted &&
1507                         (S_ISREG(oldinfo->stages[other_source_index].mode) !=
1508                          S_ISREG(newinfo->stages[target_index].mode));
1509                 if (type_changed && collision) {
1510                         /*
1511                          * special handling so later blocks can handle this...
1512                          *
1513                          * if type_changed && collision are both true, then this
1514                          * was really a double rename, but one side wasn't
1515                          * detected due to lack of break detection.  I.e.
1516                          * something like
1517                          *    orig: has normal file 'foo'
1518                          *    side1: renames 'foo' to 'bar', adds 'foo' symlink
1519                          *    side2: renames 'foo' to 'bar'
1520                          * In this case, the foo->bar rename on side1 won't be
1521                          * detected because the new symlink named 'foo' is
1522                          * there and we don't do break detection.  But we detect
1523                          * this here because we don't want to merge the content
1524                          * of the foo symlink with the foo->bar file, so we
1525                          * have some logic to handle this special case.  The
1526                          * easiest way to do that is make 'bar' on side1 not
1527                          * be considered a colliding file but the other part
1528                          * of a normal rename.  If the file is very different,
1529                          * well we're going to get content merge conflicts
1530                          * anyway so it doesn't hurt.  And if the colliding
1531                          * file also has a different type, that'll be handled
1532                          * by the content merge logic in process_entry() too.
1533                          *
1534                          * See also t6430, 'rename vs. rename/symlink'
1535                          */
1536                         collision = 0;
1537                 }
1538                 if (source_deleted) {
1539                         if (target_index == 1) {
1540                                 rename_branch = opt->branch1;
1541                                 delete_branch = opt->branch2;
1542                         } else {
1543                                 rename_branch = opt->branch2;
1544                                 delete_branch = opt->branch1;
1545                         }
1546                 }
1547
1548                 assert(source_deleted || oldinfo->filemask & old_sidemask);
1549
1550                 /* Need to check for special types of rename conflicts... */
1551                 if (collision && !source_deleted) {
1552                         /* collision: rename/add or rename/rename(2to1) */
1553                         const char *pathnames[3];
1554                         struct version_info merged;
1555
1556                         struct conflict_info *base, *side1, *side2;
1557                         unsigned clean;
1558
1559                         pathnames[0] = oldpath;
1560                         pathnames[other_source_index] = oldpath;
1561                         pathnames[target_index] = newpath;
1562
1563                         base = strmap_get(&opt->priv->paths, pathnames[0]);
1564                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
1565                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
1566
1567                         VERIFY_CI(base);
1568                         VERIFY_CI(side1);
1569                         VERIFY_CI(side2);
1570
1571                         clean = handle_content_merge(opt, pair->one->path,
1572                                                      &base->stages[0],
1573                                                      &side1->stages[1],
1574                                                      &side2->stages[2],
1575                                                      pathnames,
1576                                                      1 + 2 * opt->priv->call_depth,
1577                                                      &merged);
1578
1579                         memcpy(&newinfo->stages[target_index], &merged,
1580                                sizeof(merged));
1581                         if (!clean) {
1582                                 path_msg(opt, newpath, 0,
1583                                          _("CONFLICT (rename involved in "
1584                                            "collision): rename of %s -> %s has "
1585                                            "content conflicts AND collides "
1586                                            "with another path; this may result "
1587                                            "in nested conflict markers."),
1588                                          oldpath, newpath);
1589                         }
1590                 } else if (collision && source_deleted) {
1591                         /*
1592                          * rename/add/delete or rename/rename(2to1)/delete:
1593                          * since oldpath was deleted on the side that didn't
1594                          * do the rename, there's not much of a content merge
1595                          * we can do for the rename.  oldinfo->merged.is_null
1596                          * was already set, so we just leave things as-is so
1597                          * they look like an add/add conflict.
1598                          */
1599
1600                         newinfo->path_conflict = 1;
1601                         path_msg(opt, newpath, 0,
1602                                  _("CONFLICT (rename/delete): %s renamed "
1603                                    "to %s in %s, but deleted in %s."),
1604                                  oldpath, newpath, rename_branch, delete_branch);
1605                 } else {
1606                         /*
1607                          * a few different cases...start by copying the
1608                          * existing stage(s) from oldinfo over the newinfo
1609                          * and update the pathname(s).
1610                          */
1611                         memcpy(&newinfo->stages[0], &oldinfo->stages[0],
1612                                sizeof(newinfo->stages[0]));
1613                         newinfo->filemask |= (1 << MERGE_BASE);
1614                         newinfo->pathnames[0] = oldpath;
1615                         if (type_changed) {
1616                                 /* rename vs. typechange */
1617                                 /* Mark the original as resolved by removal */
1618                                 memcpy(&oldinfo->stages[0].oid, &null_oid,
1619                                        sizeof(oldinfo->stages[0].oid));
1620                                 oldinfo->stages[0].mode = 0;
1621                                 oldinfo->filemask &= 0x06;
1622                         } else if (source_deleted) {
1623                                 /* rename/delete */
1624                                 newinfo->path_conflict = 1;
1625                                 path_msg(opt, newpath, 0,
1626                                          _("CONFLICT (rename/delete): %s renamed"
1627                                            " to %s in %s, but deleted in %s."),
1628                                          oldpath, newpath,
1629                                          rename_branch, delete_branch);
1630                         } else {
1631                                 /* normal rename */
1632                                 memcpy(&newinfo->stages[other_source_index],
1633                                        &oldinfo->stages[other_source_index],
1634                                        sizeof(newinfo->stages[0]));
1635                                 newinfo->filemask |= (1 << other_source_index);
1636                                 newinfo->pathnames[other_source_index] = oldpath;
1637                         }
1638                 }
1639
1640                 if (!type_changed) {
1641                         /* Mark the original as resolved by removal */
1642                         oldinfo->merged.is_null = 1;
1643                         oldinfo->merged.clean = 1;
1644                 }
1645
1646         }
1647
1648         return clean_merge;
1649 }
1650
1651 static int compare_pairs(const void *a_, const void *b_)
1652 {
1653         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
1654         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
1655
1656         return strcmp(a->one->path, b->one->path);
1657 }
1658
1659 /* Call diffcore_rename() to compute which files have changed on given side */
1660 static void detect_regular_renames(struct merge_options *opt,
1661                                    struct tree *merge_base,
1662                                    struct tree *side,
1663                                    unsigned side_index)
1664 {
1665         struct diff_options diff_opts;
1666         struct rename_info *renames = &opt->priv->renames;
1667
1668         repo_diff_setup(opt->repo, &diff_opts);
1669         diff_opts.flags.recursive = 1;
1670         diff_opts.flags.rename_empty = 0;
1671         diff_opts.detect_rename = DIFF_DETECT_RENAME;
1672         diff_opts.rename_limit = opt->rename_limit;
1673         if (opt->rename_limit <= 0)
1674                 diff_opts.rename_limit = 1000;
1675         diff_opts.rename_score = opt->rename_score;
1676         diff_opts.show_rename_progress = opt->show_rename_progress;
1677         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1678         diff_setup_done(&diff_opts);
1679         diff_tree_oid(&merge_base->object.oid, &side->object.oid, "",
1680                       &diff_opts);
1681         diffcore_std(&diff_opts);
1682
1683         if (diff_opts.needed_rename_limit > renames->needed_limit)
1684                 renames->needed_limit = diff_opts.needed_rename_limit;
1685
1686         renames->pairs[side_index] = diff_queued_diff;
1687
1688         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1689         diff_queued_diff.nr = 0;
1690         diff_queued_diff.queue = NULL;
1691         diff_flush(&diff_opts);
1692 }
1693
1694 /*
1695  * Get information of all renames which occurred in 'side_pairs', discarding
1696  * non-renames.
1697  */
1698 static int collect_renames(struct merge_options *opt,
1699                            struct diff_queue_struct *result,
1700                            unsigned side_index,
1701                            struct strmap *dir_renames_for_side,
1702                            struct strmap *rename_exclusions)
1703 {
1704         int i, clean = 1;
1705         struct strmap collisions;
1706         struct diff_queue_struct *side_pairs;
1707         struct hashmap_iter iter;
1708         struct strmap_entry *entry;
1709         struct rename_info *renames = &opt->priv->renames;
1710
1711         side_pairs = &renames->pairs[side_index];
1712         compute_collisions(&collisions, dir_renames_for_side, side_pairs);
1713
1714         for (i = 0; i < side_pairs->nr; ++i) {
1715                 struct diff_filepair *p = side_pairs->queue[i];
1716                 char *new_path; /* non-NULL only with directory renames */
1717
1718                 if (p->status != 'A' && p->status != 'R') {
1719                         diff_free_filepair(p);
1720                         continue;
1721                 }
1722
1723                 new_path = check_for_directory_rename(opt, p->two->path,
1724                                                       side_index,
1725                                                       dir_renames_for_side,
1726                                                       rename_exclusions,
1727                                                       &collisions,
1728                                                       &clean);
1729
1730                 if (p->status != 'R' && !new_path) {
1731                         diff_free_filepair(p);
1732                         continue;
1733                 }
1734
1735                 if (new_path)
1736                         apply_directory_rename_modifications(opt, p, new_path);
1737
1738                 /*
1739                  * p->score comes back from diffcore_rename_extended() with
1740                  * the similarity of the renamed file.  The similarity is
1741                  * was used to determine that the two files were related
1742                  * and are a rename, which we have already used, but beyond
1743                  * that we have no use for the similarity.  So p->score is
1744                  * now irrelevant.  However, process_renames() will need to
1745                  * know which side of the merge this rename was associated
1746                  * with, so overwrite p->score with that value.
1747                  */
1748                 p->score = side_index;
1749                 result->queue[result->nr++] = p;
1750         }
1751
1752         /* Free each value in the collisions map */
1753         strmap_for_each_entry(&collisions, &iter, entry) {
1754                 struct collision_info *info = entry->value;
1755                 string_list_clear(&info->source_files, 0);
1756         }
1757         /*
1758          * In compute_collisions(), we set collisions.strdup_strings to 0
1759          * so that we wouldn't have to make another copy of the new_path
1760          * allocated by apply_dir_rename().  But now that we've used them
1761          * and have no other references to these strings, it is time to
1762          * deallocate them.
1763          */
1764         free_strmap_strings(&collisions);
1765         strmap_clear(&collisions, 1);
1766         return clean;
1767 }
1768
1769 static int detect_and_process_renames(struct merge_options *opt,
1770                                       struct tree *merge_base,
1771                                       struct tree *side1,
1772                                       struct tree *side2)
1773 {
1774         struct diff_queue_struct combined;
1775         struct rename_info *renames = &opt->priv->renames;
1776         int need_dir_renames, s, clean = 1;
1777
1778         memset(&combined, 0, sizeof(combined));
1779
1780         detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1);
1781         detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2);
1782
1783         need_dir_renames =
1784           !opt->priv->call_depth &&
1785           (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE ||
1786            opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT);
1787
1788         if (need_dir_renames) {
1789                 get_provisional_directory_renames(opt, MERGE_SIDE1, &clean);
1790                 get_provisional_directory_renames(opt, MERGE_SIDE2, &clean);
1791                 handle_directory_level_conflicts(opt);
1792         }
1793
1794         ALLOC_GROW(combined.queue,
1795                    renames->pairs[1].nr + renames->pairs[2].nr,
1796                    combined.alloc);
1797         clean &= collect_renames(opt, &combined, MERGE_SIDE1,
1798                                  &renames->dir_renames[2],
1799                                  &renames->dir_renames[1]);
1800         clean &= collect_renames(opt, &combined, MERGE_SIDE2,
1801                                  &renames->dir_renames[1],
1802                                  &renames->dir_renames[2]);
1803         QSORT(combined.queue, combined.nr, compare_pairs);
1804
1805         clean &= process_renames(opt, &combined);
1806
1807         /* Free memory for renames->pairs[] and combined */
1808         for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
1809                 free(renames->pairs[s].queue);
1810                 DIFF_QUEUE_CLEAR(&renames->pairs[s]);
1811         }
1812         if (combined.nr) {
1813                 int i;
1814                 for (i = 0; i < combined.nr; i++)
1815                         diff_free_filepair(combined.queue[i]);
1816                 free(combined.queue);
1817         }
1818
1819         return clean;
1820 }
1821
1822 /*** Function Grouping: functions related to process_entries() ***/
1823
1824 static int string_list_df_name_compare(const char *one, const char *two)
1825 {
1826         int onelen = strlen(one);
1827         int twolen = strlen(two);
1828         /*
1829          * Here we only care that entries for D/F conflicts are
1830          * adjacent, in particular with the file of the D/F conflict
1831          * appearing before files below the corresponding directory.
1832          * The order of the rest of the list is irrelevant for us.
1833          *
1834          * To achieve this, we sort with df_name_compare and provide
1835          * the mode S_IFDIR so that D/F conflicts will sort correctly.
1836          * We use the mode S_IFDIR for everything else for simplicity,
1837          * since in other cases any changes in their order due to
1838          * sorting cause no problems for us.
1839          */
1840         int cmp = df_name_compare(one, onelen, S_IFDIR,
1841                                   two, twolen, S_IFDIR);
1842         /*
1843          * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
1844          * that 'foo' comes before 'foo/bar'.
1845          */
1846         if (cmp)
1847                 return cmp;
1848         return onelen - twolen;
1849 }
1850
1851 struct directory_versions {
1852         /*
1853          * versions: list of (basename -> version_info)
1854          *
1855          * The basenames are in reverse lexicographic order of full pathnames,
1856          * as processed in process_entries().  This puts all entries within
1857          * a directory together, and covers the directory itself after
1858          * everything within it, allowing us to write subtrees before needing
1859          * to record information for the tree itself.
1860          */
1861         struct string_list versions;
1862
1863         /*
1864          * offsets: list of (full relative path directories -> integer offsets)
1865          *
1866          * Since versions contains basenames from files in multiple different
1867          * directories, we need to know which entries in versions correspond
1868          * to which directories.  Values of e.g.
1869          *     ""             0
1870          *     src            2
1871          *     src/moduleA    5
1872          * Would mean that entries 0-1 of versions are files in the toplevel
1873          * directory, entries 2-4 are files under src/, and the remaining
1874          * entries starting at index 5 are files under src/moduleA/.
1875          */
1876         struct string_list offsets;
1877
1878         /*
1879          * last_directory: directory that previously processed file found in
1880          *
1881          * last_directory starts NULL, but records the directory in which the
1882          * previous file was found within.  As soon as
1883          *    directory(current_file) != last_directory
1884          * then we need to start updating accounting in versions & offsets.
1885          * Note that last_directory is always the last path in "offsets" (or
1886          * NULL if "offsets" is empty) so this exists just for quick access.
1887          */
1888         const char *last_directory;
1889
1890         /* last_directory_len: cached computation of strlen(last_directory) */
1891         unsigned last_directory_len;
1892 };
1893
1894 static int tree_entry_order(const void *a_, const void *b_)
1895 {
1896         const struct string_list_item *a = a_;
1897         const struct string_list_item *b = b_;
1898
1899         const struct merged_info *ami = a->util;
1900         const struct merged_info *bmi = b->util;
1901         return base_name_compare(a->string, strlen(a->string), ami->result.mode,
1902                                  b->string, strlen(b->string), bmi->result.mode);
1903 }
1904
1905 static void write_tree(struct object_id *result_oid,
1906                        struct string_list *versions,
1907                        unsigned int offset,
1908                        size_t hash_size)
1909 {
1910         size_t maxlen = 0, extra;
1911         unsigned int nr = versions->nr - offset;
1912         struct strbuf buf = STRBUF_INIT;
1913         struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
1914         int i;
1915
1916         /*
1917          * We want to sort the last (versions->nr-offset) entries in versions.
1918          * Do so by abusing the string_list API a bit: make another string_list
1919          * that contains just those entries and then sort them.
1920          *
1921          * We won't use relevant_entries again and will let it just pop off the
1922          * stack, so there won't be allocation worries or anything.
1923          */
1924         relevant_entries.items = versions->items + offset;
1925         relevant_entries.nr = versions->nr - offset;
1926         QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
1927
1928         /* Pre-allocate some space in buf */
1929         extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
1930         for (i = 0; i < nr; i++) {
1931                 maxlen += strlen(versions->items[offset+i].string) + extra;
1932         }
1933         strbuf_grow(&buf, maxlen);
1934
1935         /* Write each entry out to buf */
1936         for (i = 0; i < nr; i++) {
1937                 struct merged_info *mi = versions->items[offset+i].util;
1938                 struct version_info *ri = &mi->result;
1939                 strbuf_addf(&buf, "%o %s%c",
1940                             ri->mode,
1941                             versions->items[offset+i].string, '\0');
1942                 strbuf_add(&buf, ri->oid.hash, hash_size);
1943         }
1944
1945         /* Write this object file out, and record in result_oid */
1946         write_object_file(buf.buf, buf.len, tree_type, result_oid);
1947         strbuf_release(&buf);
1948 }
1949
1950 static void record_entry_for_tree(struct directory_versions *dir_metadata,
1951                                   const char *path,
1952                                   struct merged_info *mi)
1953 {
1954         const char *basename;
1955
1956         if (mi->is_null)
1957                 /* nothing to record */
1958                 return;
1959
1960         basename = path + mi->basename_offset;
1961         assert(strchr(basename, '/') == NULL);
1962         string_list_append(&dir_metadata->versions,
1963                            basename)->util = &mi->result;
1964 }
1965
1966 static void write_completed_directory(struct merge_options *opt,
1967                                       const char *new_directory_name,
1968                                       struct directory_versions *info)
1969 {
1970         const char *prev_dir;
1971         struct merged_info *dir_info = NULL;
1972         unsigned int offset;
1973
1974         /*
1975          * Some explanation of info->versions and info->offsets...
1976          *
1977          * process_entries() iterates over all relevant files AND
1978          * directories in reverse lexicographic order, and calls this
1979          * function.  Thus, an example of the paths that process_entries()
1980          * could operate on (along with the directories for those paths
1981          * being shown) is:
1982          *
1983          *     xtract.c             ""
1984          *     tokens.txt           ""
1985          *     src/moduleB/umm.c    src/moduleB
1986          *     src/moduleB/stuff.h  src/moduleB
1987          *     src/moduleB/baz.c    src/moduleB
1988          *     src/moduleB          src
1989          *     src/moduleA/foo.c    src/moduleA
1990          *     src/moduleA/bar.c    src/moduleA
1991          *     src/moduleA          src
1992          *     src                  ""
1993          *     Makefile             ""
1994          *
1995          * info->versions:
1996          *
1997          *     always contains the unprocessed entries and their
1998          *     version_info information.  For example, after the first five
1999          *     entries above, info->versions would be:
2000          *
2001          *         xtract.c     <xtract.c's version_info>
2002          *         token.txt    <token.txt's version_info>
2003          *         umm.c        <src/moduleB/umm.c's version_info>
2004          *         stuff.h      <src/moduleB/stuff.h's version_info>
2005          *         baz.c        <src/moduleB/baz.c's version_info>
2006          *
2007          *     Once a subdirectory is completed we remove the entries in
2008          *     that subdirectory from info->versions, writing it as a tree
2009          *     (write_tree()).  Thus, as soon as we get to src/moduleB,
2010          *     info->versions would be updated to
2011          *
2012          *         xtract.c     <xtract.c's version_info>
2013          *         token.txt    <token.txt's version_info>
2014          *         moduleB      <src/moduleB's version_info>
2015          *
2016          * info->offsets:
2017          *
2018          *     helps us track which entries in info->versions correspond to
2019          *     which directories.  When we are N directories deep (e.g. 4
2020          *     for src/modA/submod/subdir/), we have up to N+1 unprocessed
2021          *     directories (+1 because of toplevel dir).  Corresponding to
2022          *     the info->versions example above, after processing five entries
2023          *     info->offsets will be:
2024          *
2025          *         ""           0
2026          *         src/moduleB  2
2027          *
2028          *     which is used to know that xtract.c & token.txt are from the
2029          *     toplevel dirctory, while umm.c & stuff.h & baz.c are from the
2030          *     src/moduleB directory.  Again, following the example above,
2031          *     once we need to process src/moduleB, then info->offsets is
2032          *     updated to
2033          *
2034          *         ""           0
2035          *         src          2
2036          *
2037          *     which says that moduleB (and only moduleB so far) is in the
2038          *     src directory.
2039          *
2040          *     One unique thing to note about info->offsets here is that
2041          *     "src" was not added to info->offsets until there was a path
2042          *     (a file OR directory) immediately below src/ that got
2043          *     processed.
2044          *
2045          * Since process_entry() just appends new entries to info->versions,
2046          * write_completed_directory() only needs to do work if the next path
2047          * is in a directory that is different than the last directory found
2048          * in info->offsets.
2049          */
2050
2051         /*
2052          * If we are working with the same directory as the last entry, there
2053          * is no work to do.  (See comments above the directory_name member of
2054          * struct merged_info for why we can use pointer comparison instead of
2055          * strcmp here.)
2056          */
2057         if (new_directory_name == info->last_directory)
2058                 return;
2059
2060         /*
2061          * If we are just starting (last_directory is NULL), or last_directory
2062          * is a prefix of the current directory, then we can just update
2063          * info->offsets to record the offset where we started this directory
2064          * and update last_directory to have quick access to it.
2065          */
2066         if (info->last_directory == NULL ||
2067             !strncmp(new_directory_name, info->last_directory,
2068                      info->last_directory_len)) {
2069                 uintptr_t offset = info->versions.nr;
2070
2071                 info->last_directory = new_directory_name;
2072                 info->last_directory_len = strlen(info->last_directory);
2073                 /*
2074                  * Record the offset into info->versions where we will
2075                  * start recording basenames of paths found within
2076                  * new_directory_name.
2077                  */
2078                 string_list_append(&info->offsets,
2079                                    info->last_directory)->util = (void*)offset;
2080                 return;
2081         }
2082
2083         /*
2084          * The next entry that will be processed will be within
2085          * new_directory_name.  Since at this point we know that
2086          * new_directory_name is within a different directory than
2087          * info->last_directory, we have all entries for info->last_directory
2088          * in info->versions and we need to create a tree object for them.
2089          */
2090         dir_info = strmap_get(&opt->priv->paths, info->last_directory);
2091         assert(dir_info);
2092         offset = (uintptr_t)info->offsets.items[info->offsets.nr-1].util;
2093         if (offset == info->versions.nr) {
2094                 /*
2095                  * Actually, we don't need to create a tree object in this
2096                  * case.  Whenever all files within a directory disappear
2097                  * during the merge (e.g. unmodified on one side and
2098                  * deleted on the other, or files were renamed elsewhere),
2099                  * then we get here and the directory itself needs to be
2100                  * omitted from its parent tree as well.
2101                  */
2102                 dir_info->is_null = 1;
2103         } else {
2104                 /*
2105                  * Write out the tree to the git object directory, and also
2106                  * record the mode and oid in dir_info->result.
2107                  */
2108                 dir_info->is_null = 0;
2109                 dir_info->result.mode = S_IFDIR;
2110                 write_tree(&dir_info->result.oid, &info->versions, offset,
2111                            opt->repo->hash_algo->rawsz);
2112         }
2113
2114         /*
2115          * We've now used several entries from info->versions and one entry
2116          * from info->offsets, so we get rid of those values.
2117          */
2118         info->offsets.nr--;
2119         info->versions.nr = offset;
2120
2121         /*
2122          * Now we've taken care of the completed directory, but we need to
2123          * prepare things since future entries will be in
2124          * new_directory_name.  (In particular, process_entry() will be
2125          * appending new entries to info->versions.)  So, we need to make
2126          * sure new_directory_name is the last entry in info->offsets.
2127          */
2128         prev_dir = info->offsets.nr == 0 ? NULL :
2129                    info->offsets.items[info->offsets.nr-1].string;
2130         if (new_directory_name != prev_dir) {
2131                 uintptr_t c = info->versions.nr;
2132                 string_list_append(&info->offsets,
2133                                    new_directory_name)->util = (void*)c;
2134         }
2135
2136         /* And, of course, we need to update last_directory to match. */
2137         info->last_directory = new_directory_name;
2138         info->last_directory_len = strlen(info->last_directory);
2139 }
2140
2141 /* Per entry merge function */
2142 static void process_entry(struct merge_options *opt,
2143                           const char *path,
2144                           struct conflict_info *ci,
2145                           struct directory_versions *dir_metadata)
2146 {
2147         VERIFY_CI(ci);
2148         assert(ci->filemask >= 0 && ci->filemask <= 7);
2149         /* ci->match_mask == 7 was handled in collect_merge_info_callback() */
2150         assert(ci->match_mask == 0 || ci->match_mask == 3 ||
2151                ci->match_mask == 5 || ci->match_mask == 6);
2152
2153         if (ci->dirmask) {
2154                 record_entry_for_tree(dir_metadata, path, &ci->merged);
2155                 if (ci->filemask == 0)
2156                         /* nothing else to handle */
2157                         return;
2158                 assert(ci->df_conflict);
2159         }
2160
2161         if (ci->df_conflict) {
2162                 die("Not yet implemented.");
2163         }
2164
2165         /*
2166          * NOTE: Below there is a long switch-like if-elseif-elseif... block
2167          *       which the code goes through even for the df_conflict cases
2168          *       above.  Well, it will once we don't die-not-implemented above.
2169          */
2170         if (ci->match_mask) {
2171                 ci->merged.clean = 1;
2172                 if (ci->match_mask == 6) {
2173                         /* stages[1] == stages[2] */
2174                         ci->merged.result.mode = ci->stages[1].mode;
2175                         oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
2176                 } else {
2177                         /* determine the mask of the side that didn't match */
2178                         unsigned int othermask = 7 & ~ci->match_mask;
2179                         int side = (othermask == 4) ? 2 : 1;
2180
2181                         ci->merged.result.mode = ci->stages[side].mode;
2182                         ci->merged.is_null = !ci->merged.result.mode;
2183                         oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
2184
2185                         assert(othermask == 2 || othermask == 4);
2186                         assert(ci->merged.is_null ==
2187                                (ci->filemask == ci->match_mask));
2188                 }
2189         } else if (ci->filemask >= 6 &&
2190                    (S_IFMT & ci->stages[1].mode) !=
2191                    (S_IFMT & ci->stages[2].mode)) {
2192                 /*
2193                  * Two different items from (file/submodule/symlink)
2194                  */
2195                 die("Not yet implemented.");
2196         } else if (ci->filemask >= 6) {
2197                 /*
2198                  * TODO: Needs a two-way or three-way content merge, but we're
2199                  * just being lazy and copying the version from HEAD and
2200                  * leaving it as conflicted.
2201                  */
2202                 ci->merged.clean = 0;
2203                 ci->merged.result.mode = ci->stages[1].mode;
2204                 oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
2205                 /* When we fix above, we'll call handle_content_merge() */
2206                 (void)handle_content_merge;
2207         } else if (ci->filemask == 3 || ci->filemask == 5) {
2208                 /* Modify/delete */
2209                 const char *modify_branch, *delete_branch;
2210                 int side = (ci->filemask == 5) ? 2 : 1;
2211                 int index = opt->priv->call_depth ? 0 : side;
2212
2213                 ci->merged.result.mode = ci->stages[index].mode;
2214                 oidcpy(&ci->merged.result.oid, &ci->stages[index].oid);
2215                 ci->merged.clean = 0;
2216
2217                 modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
2218                 delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
2219
2220                 if (ci->path_conflict &&
2221                     oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
2222                         /*
2223                          * This came from a rename/delete; no action to take,
2224                          * but avoid printing "modify/delete" conflict notice
2225                          * since the contents were not modified.
2226                          */
2227                 } else {
2228                         path_msg(opt, path, 0,
2229                                  _("CONFLICT (modify/delete): %s deleted in %s "
2230                                    "and modified in %s.  Version %s of %s left "
2231                                    "in tree."),
2232                                  path, delete_branch, modify_branch,
2233                                  modify_branch, path);
2234                 }
2235         } else if (ci->filemask == 2 || ci->filemask == 4) {
2236                 /* Added on one side */
2237                 int side = (ci->filemask == 4) ? 2 : 1;
2238                 ci->merged.result.mode = ci->stages[side].mode;
2239                 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
2240                 ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
2241         } else if (ci->filemask == 1) {
2242                 /* Deleted on both sides */
2243                 ci->merged.is_null = 1;
2244                 ci->merged.result.mode = 0;
2245                 oidcpy(&ci->merged.result.oid, &null_oid);
2246                 ci->merged.clean = !ci->path_conflict;
2247         }
2248
2249         /*
2250          * If still conflicted, record it separately.  This allows us to later
2251          * iterate over just conflicted entries when updating the index instead
2252          * of iterating over all entries.
2253          */
2254         if (!ci->merged.clean)
2255                 strmap_put(&opt->priv->conflicted, path, ci);
2256         record_entry_for_tree(dir_metadata, path, &ci->merged);
2257 }
2258
2259 static void process_entries(struct merge_options *opt,
2260                             struct object_id *result_oid)
2261 {
2262         struct hashmap_iter iter;
2263         struct strmap_entry *e;
2264         struct string_list plist = STRING_LIST_INIT_NODUP;
2265         struct string_list_item *entry;
2266         struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
2267                                                    STRING_LIST_INIT_NODUP,
2268                                                    NULL, 0 };
2269
2270         if (strmap_empty(&opt->priv->paths)) {
2271                 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
2272                 return;
2273         }
2274
2275         /* Hack to pre-allocate plist to the desired size */
2276         ALLOC_GROW(plist.items, strmap_get_size(&opt->priv->paths), plist.alloc);
2277
2278         /* Put every entry from paths into plist, then sort */
2279         strmap_for_each_entry(&opt->priv->paths, &iter, e) {
2280                 string_list_append(&plist, e->key)->util = e->value;
2281         }
2282         plist.cmp = string_list_df_name_compare;
2283         string_list_sort(&plist);
2284
2285         /*
2286          * Iterate over the items in reverse order, so we can handle paths
2287          * below a directory before needing to handle the directory itself.
2288          *
2289          * This allows us to write subtrees before we need to write trees,
2290          * and it also enables sane handling of directory/file conflicts
2291          * (because it allows us to know whether the directory is still in
2292          * the way when it is time to process the file at the same path).
2293          */
2294         for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
2295                 char *path = entry->string;
2296                 /*
2297                  * NOTE: mi may actually be a pointer to a conflict_info, but
2298                  * we have to check mi->clean first to see if it's safe to
2299                  * reassign to such a pointer type.
2300                  */
2301                 struct merged_info *mi = entry->util;
2302
2303                 write_completed_directory(opt, mi->directory_name,
2304                                           &dir_metadata);
2305                 if (mi->clean)
2306                         record_entry_for_tree(&dir_metadata, path, mi);
2307                 else {
2308                         struct conflict_info *ci = (struct conflict_info *)mi;
2309                         process_entry(opt, path, ci, &dir_metadata);
2310                 }
2311         }
2312
2313         if (dir_metadata.offsets.nr != 1 ||
2314             (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
2315                 printf("dir_metadata.offsets.nr = %d (should be 1)\n",
2316                        dir_metadata.offsets.nr);
2317                 printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
2318                        (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
2319                 fflush(stdout);
2320                 BUG("dir_metadata accounting completely off; shouldn't happen");
2321         }
2322         write_tree(result_oid, &dir_metadata.versions, 0,
2323                    opt->repo->hash_algo->rawsz);
2324         string_list_clear(&plist, 0);
2325         string_list_clear(&dir_metadata.versions, 0);
2326         string_list_clear(&dir_metadata.offsets, 0);
2327 }
2328
2329 /*** Function Grouping: functions related to merge_switch_to_result() ***/
2330
2331 static int checkout(struct merge_options *opt,
2332                     struct tree *prev,
2333                     struct tree *next)
2334 {
2335         /* Switch the index/working copy from old to new */
2336         int ret;
2337         struct tree_desc trees[2];
2338         struct unpack_trees_options unpack_opts;
2339
2340         memset(&unpack_opts, 0, sizeof(unpack_opts));
2341         unpack_opts.head_idx = -1;
2342         unpack_opts.src_index = opt->repo->index;
2343         unpack_opts.dst_index = opt->repo->index;
2344
2345         setup_unpack_trees_porcelain(&unpack_opts, "merge");
2346
2347         /*
2348          * NOTE: if this were just "git checkout" code, we would probably
2349          * read or refresh the cache and check for a conflicted index, but
2350          * builtin/merge.c or sequencer.c really needs to read the index
2351          * and check for conflicted entries before starting merging for a
2352          * good user experience (no sense waiting for merges/rebases before
2353          * erroring out), so there's no reason to duplicate that work here.
2354          */
2355
2356         /* 2-way merge to the new branch */
2357         unpack_opts.update = 1;
2358         unpack_opts.merge = 1;
2359         unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
2360         unpack_opts.verbose_update = (opt->verbosity > 2);
2361         unpack_opts.fn = twoway_merge;
2362         if (1/* FIXME: opts->overwrite_ignore*/) {
2363                 unpack_opts.dir = xcalloc(1, sizeof(*unpack_opts.dir));
2364                 unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
2365                 setup_standard_excludes(unpack_opts.dir);
2366         }
2367         parse_tree(prev);
2368         init_tree_desc(&trees[0], prev->buffer, prev->size);
2369         parse_tree(next);
2370         init_tree_desc(&trees[1], next->buffer, next->size);
2371
2372         ret = unpack_trees(2, trees, &unpack_opts);
2373         clear_unpack_trees_porcelain(&unpack_opts);
2374         dir_clear(unpack_opts.dir);
2375         FREE_AND_NULL(unpack_opts.dir);
2376         return ret;
2377 }
2378
2379 static int record_conflicted_index_entries(struct merge_options *opt,
2380                                            struct index_state *index,
2381                                            struct strmap *paths,
2382                                            struct strmap *conflicted)
2383 {
2384         struct hashmap_iter iter;
2385         struct strmap_entry *e;
2386         int errs = 0;
2387         int original_cache_nr;
2388
2389         if (strmap_empty(conflicted))
2390                 return 0;
2391
2392         original_cache_nr = index->cache_nr;
2393
2394         /* Put every entry from paths into plist, then sort */
2395         strmap_for_each_entry(conflicted, &iter, e) {
2396                 const char *path = e->key;
2397                 struct conflict_info *ci = e->value;
2398                 int pos;
2399                 struct cache_entry *ce;
2400                 int i;
2401
2402                 VERIFY_CI(ci);
2403
2404                 /*
2405                  * The index will already have a stage=0 entry for this path,
2406                  * because we created an as-merged-as-possible version of the
2407                  * file and checkout() moved the working copy and index over
2408                  * to that version.
2409                  *
2410                  * However, previous iterations through this loop will have
2411                  * added unstaged entries to the end of the cache which
2412                  * ignore the standard alphabetical ordering of cache
2413                  * entries and break invariants needed for index_name_pos()
2414                  * to work.  However, we know the entry we want is before
2415                  * those appended cache entries, so do a temporary swap on
2416                  * cache_nr to only look through entries of interest.
2417                  */
2418                 SWAP(index->cache_nr, original_cache_nr);
2419                 pos = index_name_pos(index, path, strlen(path));
2420                 SWAP(index->cache_nr, original_cache_nr);
2421                 if (pos < 0) {
2422                         if (ci->filemask != 1)
2423                                 BUG("Conflicted %s but nothing in basic working tree or index; this shouldn't happen", path);
2424                         cache_tree_invalidate_path(index, path);
2425                 } else {
2426                         ce = index->cache[pos];
2427
2428                         /*
2429                          * Clean paths with CE_SKIP_WORKTREE set will not be
2430                          * written to the working tree by the unpack_trees()
2431                          * call in checkout().  Our conflicted entries would
2432                          * have appeared clean to that code since we ignored
2433                          * the higher order stages.  Thus, we need override
2434                          * the CE_SKIP_WORKTREE bit and manually write those
2435                          * files to the working disk here.
2436                          *
2437                          * TODO: Implement this CE_SKIP_WORKTREE fixup.
2438                          */
2439
2440                         /*
2441                          * Mark this cache entry for removal and instead add
2442                          * new stage>0 entries corresponding to the
2443                          * conflicts.  If there are many conflicted entries, we
2444                          * want to avoid memmove'ing O(NM) entries by
2445                          * inserting the new entries one at a time.  So,
2446                          * instead, we just add the new cache entries to the
2447                          * end (ignoring normal index requirements on sort
2448                          * order) and sort the index once we're all done.
2449                          */
2450                         ce->ce_flags |= CE_REMOVE;
2451                 }
2452
2453                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
2454                         struct version_info *vi;
2455                         if (!(ci->filemask & (1ul << i)))
2456                                 continue;
2457                         vi = &ci->stages[i];
2458                         ce = make_cache_entry(index, vi->mode, &vi->oid,
2459                                               path, i+1, 0);
2460                         add_index_entry(index, ce, ADD_CACHE_JUST_APPEND);
2461                 }
2462         }
2463
2464         /*
2465          * Remove the unused cache entries (and invalidate the relevant
2466          * cache-trees), then sort the index entries to get the conflicted
2467          * entries we added to the end into their right locations.
2468          */
2469         remove_marked_cache_entries(index, 1);
2470         QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
2471
2472         return errs;
2473 }
2474
2475 void merge_switch_to_result(struct merge_options *opt,
2476                             struct tree *head,
2477                             struct merge_result *result,
2478                             int update_worktree_and_index,
2479                             int display_update_msgs)
2480 {
2481         assert(opt->priv == NULL);
2482         if (result->clean >= 0 && update_worktree_and_index) {
2483                 struct merge_options_internal *opti = result->priv;
2484
2485                 if (checkout(opt, head, result->tree)) {
2486                         /* failure to function */
2487                         result->clean = -1;
2488                         return;
2489                 }
2490
2491                 if (record_conflicted_index_entries(opt, opt->repo->index,
2492                                                     &opti->paths,
2493                                                     &opti->conflicted)) {
2494                         /* failure to function */
2495                         result->clean = -1;
2496                         return;
2497                 }
2498         }
2499
2500         if (display_update_msgs) {
2501                 struct merge_options_internal *opti = result->priv;
2502                 struct hashmap_iter iter;
2503                 struct strmap_entry *e;
2504                 struct string_list olist = STRING_LIST_INIT_NODUP;
2505                 int i;
2506
2507                 /* Hack to pre-allocate olist to the desired size */
2508                 ALLOC_GROW(olist.items, strmap_get_size(&opti->output),
2509                            olist.alloc);
2510
2511                 /* Put every entry from output into olist, then sort */
2512                 strmap_for_each_entry(&opti->output, &iter, e) {
2513                         string_list_append(&olist, e->key)->util = e->value;
2514                 }
2515                 string_list_sort(&olist);
2516
2517                 /* Iterate over the items, printing them */
2518                 for (i = 0; i < olist.nr; ++i) {
2519                         struct strbuf *sb = olist.items[i].util;
2520
2521                         printf("%s", sb->buf);
2522                 }
2523                 string_list_clear(&olist, 0);
2524
2525                 /* Also include needed rename limit adjustment now */
2526                 diff_warn_rename_limit("merge.renamelimit",
2527                                        opti->renames.needed_limit, 0);
2528         }
2529
2530         merge_finalize(opt, result);
2531 }
2532
2533 void merge_finalize(struct merge_options *opt,
2534                     struct merge_result *result)
2535 {
2536         struct merge_options_internal *opti = result->priv;
2537
2538         assert(opt->priv == NULL);
2539
2540         clear_or_reinit_internal_opts(opti, 0);
2541         FREE_AND_NULL(opti);
2542 }
2543
2544 /*** Function Grouping: helper functions for merge_incore_*() ***/
2545
2546 static inline void set_commit_tree(struct commit *c, struct tree *t)
2547 {
2548         c->maybe_tree = t;
2549 }
2550
2551 static struct commit *make_virtual_commit(struct repository *repo,
2552                                           struct tree *tree,
2553                                           const char *comment)
2554 {
2555         struct commit *commit = alloc_commit_node(repo);
2556
2557         set_merge_remote_desc(commit, comment, (struct object *)commit);
2558         set_commit_tree(commit, tree);
2559         commit->object.parsed = 1;
2560         return commit;
2561 }
2562
2563 static void merge_start(struct merge_options *opt, struct merge_result *result)
2564 {
2565         struct rename_info *renames;
2566         int i;
2567
2568         /* Sanity checks on opt */
2569         assert(opt->repo);
2570
2571         assert(opt->branch1 && opt->branch2);
2572
2573         assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
2574                opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
2575         assert(opt->rename_limit >= -1);
2576         assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
2577         assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
2578
2579         assert(opt->xdl_opts >= 0);
2580         assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
2581                opt->recursive_variant <= MERGE_VARIANT_THEIRS);
2582
2583         /*
2584          * detect_renames, verbosity, buffer_output, and obuf are ignored
2585          * fields that were used by "recursive" rather than "ort" -- but
2586          * sanity check them anyway.
2587          */
2588         assert(opt->detect_renames >= -1 &&
2589                opt->detect_renames <= DIFF_DETECT_COPY);
2590         assert(opt->verbosity >= 0 && opt->verbosity <= 5);
2591         assert(opt->buffer_output <= 2);
2592         assert(opt->obuf.len == 0);
2593
2594         assert(opt->priv == NULL);
2595
2596         /* Default to histogram diff.  Actually, just hardcode it...for now. */
2597         opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
2598
2599         /* Initialization of opt->priv, our internal merge data */
2600         opt->priv = xcalloc(1, sizeof(*opt->priv));
2601
2602         /* Initialization of various renames fields */
2603         renames = &opt->priv->renames;
2604         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
2605                 strset_init_with_options(&renames->dirs_removed[i],
2606                                          NULL, 0);
2607                 strmap_init_with_options(&renames->dir_rename_count[i],
2608                                          NULL, 1);
2609                 strmap_init_with_options(&renames->dir_renames[i],
2610                                          NULL, 0);
2611         }
2612
2613         /*
2614          * Although we initialize opt->priv->paths with strdup_strings=0,
2615          * that's just to avoid making yet another copy of an allocated
2616          * string.  Putting the entry into paths means we are taking
2617          * ownership, so we will later free it.  paths_to_free is similar.
2618          *
2619          * In contrast, conflicted just has a subset of keys from paths, so
2620          * we don't want to free those (it'd be a duplicate free).
2621          */
2622         strmap_init_with_options(&opt->priv->paths, NULL, 0);
2623         strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
2624         string_list_init(&opt->priv->paths_to_free, 0);
2625
2626         /*
2627          * keys & strbufs in output will sometimes need to outlive "paths",
2628          * so it will have a copy of relevant keys.  It's probably a small
2629          * subset of the overall paths that have special output.
2630          */
2631         strmap_init(&opt->priv->output);
2632 }
2633
2634 /*** Function Grouping: merge_incore_*() and their internal variants ***/
2635
2636 /*
2637  * Originally from merge_trees_internal(); heavily adapted, though.
2638  */
2639 static void merge_ort_nonrecursive_internal(struct merge_options *opt,
2640                                             struct tree *merge_base,
2641                                             struct tree *side1,
2642                                             struct tree *side2,
2643                                             struct merge_result *result)
2644 {
2645         struct object_id working_tree_oid;
2646
2647         if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
2648                 /*
2649                  * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
2650                  * base, and 2-3) the trees for the two trees we're merging.
2651                  */
2652                 err(opt, _("collecting merge info failed for trees %s, %s, %s"),
2653                     oid_to_hex(&merge_base->object.oid),
2654                     oid_to_hex(&side1->object.oid),
2655                     oid_to_hex(&side2->object.oid));
2656                 result->clean = -1;
2657                 return;
2658         }
2659
2660         result->clean = detect_and_process_renames(opt, merge_base,
2661                                                    side1, side2);
2662         process_entries(opt, &working_tree_oid);
2663
2664         /* Set return values */
2665         result->tree = parse_tree_indirect(&working_tree_oid);
2666         /* existence of conflicted entries implies unclean */
2667         result->clean &= strmap_empty(&opt->priv->conflicted);
2668         if (!opt->priv->call_depth) {
2669                 result->priv = opt->priv;
2670                 opt->priv = NULL;
2671         }
2672 }
2673
2674 /*
2675  * Originally from merge_recursive_internal(); somewhat adapted, though.
2676  */
2677 static void merge_ort_internal(struct merge_options *opt,
2678                                struct commit_list *merge_bases,
2679                                struct commit *h1,
2680                                struct commit *h2,
2681                                struct merge_result *result)
2682 {
2683         struct commit_list *iter;
2684         struct commit *merged_merge_bases;
2685         const char *ancestor_name;
2686         struct strbuf merge_base_abbrev = STRBUF_INIT;
2687
2688         if (!merge_bases) {
2689                 merge_bases = get_merge_bases(h1, h2);
2690                 /* See merge-ort.h:merge_incore_recursive() declaration NOTE */
2691                 merge_bases = reverse_commit_list(merge_bases);
2692         }
2693
2694         merged_merge_bases = pop_commit(&merge_bases);
2695         if (merged_merge_bases == NULL) {
2696                 /* if there is no common ancestor, use an empty tree */
2697                 struct tree *tree;
2698
2699                 tree = lookup_tree(opt->repo, opt->repo->hash_algo->empty_tree);
2700                 merged_merge_bases = make_virtual_commit(opt->repo, tree,
2701                                                          "ancestor");
2702                 ancestor_name = "empty tree";
2703         } else if (merge_bases) {
2704                 ancestor_name = "merged common ancestors";
2705         } else {
2706                 strbuf_add_unique_abbrev(&merge_base_abbrev,
2707                                          &merged_merge_bases->object.oid,
2708                                          DEFAULT_ABBREV);
2709                 ancestor_name = merge_base_abbrev.buf;
2710         }
2711
2712         for (iter = merge_bases; iter; iter = iter->next) {
2713                 const char *saved_b1, *saved_b2;
2714                 struct commit *prev = merged_merge_bases;
2715
2716                 opt->priv->call_depth++;
2717                 /*
2718                  * When the merge fails, the result contains files
2719                  * with conflict markers. The cleanness flag is
2720                  * ignored (unless indicating an error), it was never
2721                  * actually used, as result of merge_trees has always
2722                  * overwritten it: the committed "conflicts" were
2723                  * already resolved.
2724                  */
2725                 saved_b1 = opt->branch1;
2726                 saved_b2 = opt->branch2;
2727                 opt->branch1 = "Temporary merge branch 1";
2728                 opt->branch2 = "Temporary merge branch 2";
2729                 merge_ort_internal(opt, NULL, prev, iter->item, result);
2730                 if (result->clean < 0)
2731                         return;
2732                 opt->branch1 = saved_b1;
2733                 opt->branch2 = saved_b2;
2734                 opt->priv->call_depth--;
2735
2736                 merged_merge_bases = make_virtual_commit(opt->repo,
2737                                                          result->tree,
2738                                                          "merged tree");
2739                 commit_list_insert(prev, &merged_merge_bases->parents);
2740                 commit_list_insert(iter->item,
2741                                    &merged_merge_bases->parents->next);
2742
2743                 clear_or_reinit_internal_opts(opt->priv, 1);
2744         }
2745
2746         opt->ancestor = ancestor_name;
2747         merge_ort_nonrecursive_internal(opt,
2748                                         repo_get_commit_tree(opt->repo,
2749                                                              merged_merge_bases),
2750                                         repo_get_commit_tree(opt->repo, h1),
2751                                         repo_get_commit_tree(opt->repo, h2),
2752                                         result);
2753         strbuf_release(&merge_base_abbrev);
2754         opt->ancestor = NULL;  /* avoid accidental re-use of opt->ancestor */
2755 }
2756
2757 void merge_incore_nonrecursive(struct merge_options *opt,
2758                                struct tree *merge_base,
2759                                struct tree *side1,
2760                                struct tree *side2,
2761                                struct merge_result *result)
2762 {
2763         assert(opt->ancestor != NULL);
2764         merge_start(opt, result);
2765         merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
2766 }
2767
2768 void merge_incore_recursive(struct merge_options *opt,
2769                             struct commit_list *merge_bases,
2770                             struct commit *side1,
2771                             struct commit *side2,
2772                             struct merge_result *result)
2773 {
2774         /* We set the ancestor label based on the merge_bases */
2775         assert(opt->ancestor == NULL);
2776
2777         merge_start(opt, result);
2778         merge_ort_internal(opt, merge_bases, side1, side2, result);
2779 }