[PATCH] Parallelize the pull algorithm
[git/gitweb.git] / read-tree.c
blobce2bb88f0aae8919f188a0dbc5f3338523151740
1 /*
2 * GIT - The information manager from hell
4 * Copyright (C) Linus Torvalds, 2005
5 */
6 #include "cache.h"
8 static int stage = 0;
9 static int update = 0;
11 static int unpack_tree(unsigned char *sha1)
13 void *buffer;
14 unsigned long size;
15 int ret;
17 buffer = read_object_with_reference(sha1, "tree", &size, NULL);
18 if (!buffer)
19 return -1;
20 ret = read_tree(buffer, size, stage, NULL);
21 free(buffer);
22 return ret;
25 static int path_matches(struct cache_entry *a, struct cache_entry *b)
27 int len = ce_namelen(a);
28 return ce_namelen(b) == len &&
29 !memcmp(a->name, b->name, len);
32 static int same(struct cache_entry *a, struct cache_entry *b)
34 return a->ce_mode == b->ce_mode &&
35 !memcmp(a->sha1, b->sha1, 20);
40 * This removes all trivial merges that don't change the tree
41 * and collapses them to state 0.
43 static struct cache_entry *merge_entries(struct cache_entry *a,
44 struct cache_entry *b,
45 struct cache_entry *c)
48 * Ok, all three entries describe the same
49 * filename, but maybe the contents or file
50 * mode have changed?
52 * The trivial cases end up being the ones where two
53 * out of three files are the same:
54 * - both destinations the same, trivially take either
55 * - one of the destination versions hasn't changed,
56 * take the other.
58 * The "all entries exactly the same" case falls out as
59 * a special case of any of the "two same" cases.
61 * Here "a" is "original", and "b" and "c" are the two
62 * trees we are merging.
64 if (a && b && c) {
65 if (same(b,c))
66 return c;
67 if (same(a,b))
68 return c;
69 if (same(a,c))
70 return b;
72 return NULL;
76 * When a CE gets turned into an unmerged entry, we
77 * want it to be up-to-date
79 static void verify_uptodate(struct cache_entry *ce)
81 struct stat st;
83 if (!lstat(ce->name, &st)) {
84 unsigned changed = ce_match_stat(ce, &st);
85 if (!changed)
86 return;
87 errno = 0;
89 if (errno == ENOENT)
90 return;
91 die("Entry '%s' not uptodate. Cannot merge.", ce->name);
95 * If the old tree contained a CE that isn't even in the
96 * result, that's always a problem, regardless of whether
97 * it's up-to-date or not (ie it can be a file that we
98 * have updated but not committed yet).
100 static void reject_merge(struct cache_entry *ce)
102 die("Entry '%s' would be overwritten by merge. Cannot merge.", ce->name);
105 static int merged_entry_internal(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst, int allow_dirty)
107 merge->ce_flags |= htons(CE_UPDATE);
108 if (old) {
110 * See if we can re-use the old CE directly?
111 * That way we get the uptodate stat info.
113 * This also removes the UPDATE flag on
114 * a match.
116 if (same(old, merge)) {
117 *merge = *old;
118 } else if (!allow_dirty) {
119 verify_uptodate(old);
122 merge->ce_flags &= ~htons(CE_STAGEMASK);
123 *dst++ = merge;
124 return 1;
127 static int merged_entry_allow_dirty(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst)
129 return merged_entry_internal(merge, old, dst, 1);
132 static int merged_entry(struct cache_entry *merge, struct cache_entry *old, struct cache_entry **dst)
134 return merged_entry_internal(merge, old, dst, 0);
137 static int deleted_entry(struct cache_entry *ce, struct cache_entry *old, struct cache_entry **dst)
139 if (old)
140 verify_uptodate(old);
141 ce->ce_mode = 0;
142 *dst++ = ce;
143 return 1;
146 static int causes_df_conflict(struct cache_entry *ce, int stage,
147 struct cache_entry **dst_,
148 struct cache_entry **next_,
149 int tail)
151 /* This is called during the merge operation and walking
152 * the active_cache[] array is messy, because it is in the
153 * middle of overlapping copy operation. The invariants
154 * are:
155 * (1) active_cache points at the first (zeroth) entry.
156 * (2) up to dst pointer are resolved entries.
157 * (3) from the next pointer (head-inclusive) to the tail
158 * of the active_cache array have the remaining paths
159 * to be processed. There can be a gap between dst
160 * and next. Note that next is called "src" in the
161 * merge_cache() function, and tail is the original
162 * end of active_cache array when merge_cache() started.
163 * (4) the path corresponding to *ce is not found in (2)
164 * or (3). It is in the gap.
166 * active_cache -----......+++++++++++++.
167 * ^dst ^next ^tail
169 int i, next, dst;
170 const char *path = ce->name;
171 int namelen = ce_namelen(ce);
173 next = next_ - active_cache;
174 dst = dst_ - active_cache;
176 for (i = 0; i < tail; i++) {
177 int entlen, len;
178 const char *one, *two;
179 if (dst <= i && i < next)
180 continue;
181 ce = active_cache[i];
182 if (ce_stage(ce) != stage)
183 continue;
184 /* If ce->name is a prefix of path, then path is a file
185 * that hangs underneath ce->name, which is bad.
186 * If path is a prefix of ce->name, then it is the
187 * other way around which also is bad.
189 entlen = ce_namelen(ce);
190 if (namelen == entlen)
191 continue;
192 if (namelen < entlen) {
193 len = namelen;
194 one = path;
195 two = ce->name;
196 } else {
197 len = entlen;
198 one = ce->name;
199 two = path;
201 if (memcmp(one, two, len))
202 continue;
203 if (two[len] == '/')
204 return 1;
206 return 0;
209 static int threeway_merge(struct cache_entry *stages[4],
210 struct cache_entry **dst,
211 struct cache_entry **next, int tail)
213 struct cache_entry *old = stages[0];
214 struct cache_entry *a = stages[1], *b = stages[2], *c = stages[3];
215 struct cache_entry *merge;
216 int count;
218 /* #5ALT */
219 if (!a && b && c && same(b, c)) {
220 if (old && !same(b, old))
221 return -1;
222 return merged_entry_allow_dirty(b, old, dst);
224 /* #2ALT and #3ALT */
225 if (!a && (!!b != !!c)) {
227 * The reason we need to worry about directory/file
228 * conflicts only in #2ALT and #3ALT case is this:
230 * (1) For all other cases that read-tree internally
231 * resolves a path, we always have such a path in
232 * *both* stage2 and stage3 when we begin.
233 * Traditionally, the behaviour has been even
234 * stricter and we did not resolve a path without
235 * initially being in all of stage1, 2, and 3.
237 * (2) When read-tree finishes, all resolved paths (i.e.
238 * the paths that are in stage0) must have come from
239 * either stage2 or stage3. It is not possible to
240 * have a stage0 path as a result of a merge if
241 * neither stage2 nor stage3 had that path.
243 * (3) It is guaranteed that just after reading the
244 * stages, each stage cannot have directory/file
245 * conflicts on its own, because they are populated
246 * by reading hierarchy of a tree. Combined with
247 * (1) and (2) above, this means that no matter what
248 * combination of paths we take from stage2 and
249 * stage3 as a result of a merge, they cannot cause
250 * a directory/file conflict situation (otherwise
251 * the "guilty" path would have already had such a
252 * conflict in the original stage, either stage2
253 * or stage3). Although its stage2 is synthesized
254 * by overlaying the current index on top of "our
255 * head" tree, --emu23 case also has this guarantee,
256 * by calling add_cache_entry() to create such stage2
257 * entries.
259 * (4) Only #2ALT and #3ALT lack the guarantee (1).
260 * They resolve paths that exist only in stage2
261 * or stage3. The stage2 tree may have a file DF
262 * while stage3 tree may have a file DF/DF. If
263 * #2ALT and #3ALT rules happen to apply to both
264 * of them, we would end up having DF (coming from
265 * stage2) and DF/DF (from stage3) in the result.
266 * When we attempt to resolve a path that exists
267 * only in stage2, we need to make sure there is
268 * no path that would conflict with it in stage3
269 * and vice versa.
271 if (c) { /* #2ALT */
272 if (!causes_df_conflict(c, 2, dst, next, tail) &&
273 (!old || same(c, old)))
274 return merged_entry_allow_dirty(c, old, dst);
276 else { /* #3ALT */
277 if (!causes_df_conflict(b, 3, dst, next, tail) &&
278 (!old || same(b, old)))
279 return merged_entry_allow_dirty(b, old, dst);
281 /* otherwise we will apply the original rule */
283 /* #14ALT */
284 if (a && b && c && same(a, b) && !same(a, c)) {
285 if (old && same(old, c))
286 return merged_entry_allow_dirty(c, old, dst);
287 /* otherwise the regular rule applies */
290 * If we have an entry in the index cache ("old"), then we want
291 * to make sure that it matches any entries in stage 2 ("first
292 * branch", aka "b").
294 if (old) {
295 if (!b || !same(old, b))
296 return -1;
298 merge = merge_entries(a, b, c);
299 if (merge)
300 return merged_entry(merge, old, dst);
301 if (old)
302 verify_uptodate(old);
303 count = 0;
304 if (a) { *dst++ = a; count++; }
305 if (b) { *dst++ = b; count++; }
306 if (c) { *dst++ = c; count++; }
307 return count;
311 * Two-way merge.
313 * The rule is to "carry forward" what is in the index without losing
314 * information across a "fast forward", favoring a successful merge
315 * over a merge failure when it makes sense. For details of the
316 * "carry forward" rule, please see <Documentation/git-read-tree.txt>.
319 static int twoway_merge(struct cache_entry **src, struct cache_entry **dst,
320 struct cache_entry **next, int tail)
322 struct cache_entry *current = src[0];
323 struct cache_entry *oldtree = src[1], *newtree = src[2];
325 if (src[3])
326 return -1;
328 if (current) {
329 if ((!oldtree && !newtree) || /* 4 and 5 */
330 (!oldtree && newtree &&
331 same(current, newtree)) || /* 6 and 7 */
332 (oldtree && newtree &&
333 same(oldtree, newtree)) || /* 14 and 15 */
334 (oldtree && newtree &&
335 !same(oldtree, newtree) && /* 18 and 19*/
336 same(current, newtree))) {
337 *dst++ = current;
338 return 1;
340 else if (oldtree && !newtree && same(current, oldtree)) {
341 /* 10 or 11 */
342 return deleted_entry(oldtree, current, dst);
344 else if (oldtree && newtree &&
345 same(current, oldtree) && !same(current, newtree)) {
346 /* 20 or 21 */
347 return merged_entry(newtree, current, dst);
349 else
350 /* all other failures */
351 return -1;
353 else if (newtree)
354 return merged_entry(newtree, current, dst);
355 else
356 return deleted_entry(oldtree, current, dst);
360 * Two-way merge emulated with three-way merge.
362 * This treats "read-tree -m H M" by transforming it internally
363 * into "read-tree -m H I+H M", where I+H is a tree that would
364 * contain the contents of the current index file, overlayed on
365 * top of H. Unlike the traditional two-way merge, this leaves
366 * the stages in the resulting index file and lets the user resolve
367 * the merge conflicts using standard tools for three-way merge.
369 * This function is just to set-up such an arrangement, and the
370 * actual merge uses threeway_merge() function.
372 static void setup_emu23(void)
374 /* stage0 contains I, stage1 H, stage2 M.
375 * move stage2 to stage3, and create stage2 entries
376 * by scanning stage0 and stage1 entries.
378 int i, namelen, size;
379 struct cache_entry *ce, *stage2;
381 for (i = 0; i < active_nr; i++) {
382 ce = active_cache[i];
383 if (ce_stage(ce) != 2)
384 continue;
385 /* hoist them up to stage 3 */
386 namelen = ce_namelen(ce);
387 ce->ce_flags = create_ce_flags(namelen, 3);
390 for (i = 0; i < active_nr; i++) {
391 ce = active_cache[i];
392 if (ce_stage(ce) > 1)
393 continue;
394 namelen = ce_namelen(ce);
395 size = cache_entry_size(namelen);
396 stage2 = xmalloc(size);
397 memcpy(stage2, ce, size);
398 stage2->ce_flags = create_ce_flags(namelen, 2);
399 if (add_cache_entry(stage2, ADD_CACHE_OK_TO_ADD) < 0)
400 die("cannot merge index and our head tree");
402 /* We are done with this name, so skip to next name */
403 while (i < active_nr &&
404 ce_namelen(active_cache[i]) == namelen &&
405 !memcmp(active_cache[i]->name, ce->name, namelen))
406 i++;
407 i--; /* compensate for the loop control */
412 * One-way merge.
414 * The rule is:
415 * - take the stat information from stage0, take the data from stage1
417 static int oneway_merge(struct cache_entry **src, struct cache_entry **dst,
418 struct cache_entry **next, int tail)
420 struct cache_entry *old = src[0];
421 struct cache_entry *a = src[1];
423 if (src[2] || src[3])
424 return -1;
426 if (!a)
427 return 0;
428 if (old && same(old, a)) {
429 *dst++ = old;
430 return 1;
432 return merged_entry(a, NULL, dst);
435 static void check_updates(struct cache_entry **src, int nr)
437 static struct checkout state = {
438 .base_dir = "",
439 .force = 1,
440 .quiet = 1,
441 .refresh_cache = 1,
443 unsigned short mask = htons(CE_UPDATE);
444 while (nr--) {
445 struct cache_entry *ce = *src++;
446 if (!ce->ce_mode) {
447 if (update)
448 unlink(ce->name);
449 continue;
451 if (ce->ce_flags & mask) {
452 ce->ce_flags &= ~mask;
453 if (update)
454 checkout_entry(ce, &state);
459 typedef int (*merge_fn_t)(struct cache_entry **, struct cache_entry **, struct cache_entry **, int);
461 static void merge_cache(struct cache_entry **src, int nr, merge_fn_t fn)
463 struct cache_entry **dst = src;
464 int tail = nr;
466 while (nr) {
467 int entries;
468 struct cache_entry *name, *ce, *stages[4] = { NULL, };
470 name = ce = *src;
471 for (;;) {
472 int stage = ce_stage(ce);
473 stages[stage] = ce;
474 ce = *++src;
475 active_nr--;
476 if (!--nr)
477 break;
478 if (!path_matches(ce, name))
479 break;
482 entries = fn(stages, dst, src, tail);
483 if (entries < 0)
484 reject_merge(name);
485 dst += entries;
486 active_nr += entries;
488 check_updates(active_cache, active_nr);
491 static int read_cache_unmerged(void)
493 int i, deleted;
494 struct cache_entry **dst;
496 read_cache();
497 dst = active_cache;
498 deleted = 0;
499 for (i = 0; i < active_nr; i++) {
500 struct cache_entry *ce = active_cache[i];
501 if (ce_stage(ce)) {
502 deleted++;
503 continue;
505 if (deleted)
506 *dst = ce;
507 dst++;
509 active_nr -= deleted;
510 return deleted;
513 static const char read_tree_usage[] = "git-read-tree (<sha> | -m [-u] <sha1> [<sha2> [<sha3>]])";
515 static struct cache_file cache_file;
517 int main(int argc, char **argv)
519 int i, newfd, merge, reset, emu23;
520 unsigned char sha1[20];
522 newfd = hold_index_file_for_update(&cache_file, get_index_file());
523 if (newfd < 0)
524 die("unable to create new cachefile");
526 merge = 0;
527 reset = 0;
528 emu23 = 0;
529 for (i = 1; i < argc; i++) {
530 const char *arg = argv[i];
532 /* "-u" means "update", meaning that a merge will update the working directory */
533 if (!strcmp(arg, "-u")) {
534 update = 1;
535 continue;
538 /* This differs from "-m" in that we'll silently ignore unmerged entries */
539 if (!strcmp(arg, "--reset")) {
540 if (stage || merge || emu23)
541 usage(read_tree_usage);
542 reset = 1;
543 merge = 1;
544 stage = 1;
545 read_cache_unmerged();
546 continue;
549 /* "-m" stands for "merge", meaning we start in stage 1 */
550 if (!strcmp(arg, "-m")) {
551 if (stage || merge || emu23)
552 usage(read_tree_usage);
553 if (read_cache_unmerged())
554 die("you need to resolve your current index first");
555 stage = 1;
556 merge = 1;
557 continue;
560 /* "-emu23" uses 3-way merge logic to perform fast-forward */
561 if (!strcmp(arg, "--emu23")) {
562 if (stage || merge || emu23)
563 usage(read_tree_usage);
564 if (read_cache_unmerged())
565 die("you need to resolve your current index first");
566 merge = emu23 = stage = 1;
567 continue;
570 if (get_sha1(arg, sha1) < 0)
571 usage(read_tree_usage);
572 if (stage > 3)
573 usage(read_tree_usage);
574 if (unpack_tree(sha1) < 0)
575 die("failed to unpack tree object %s", arg);
576 stage++;
578 if (update && !merge)
579 usage(read_tree_usage);
580 if (merge) {
581 static const merge_fn_t merge_function[] = {
582 [1] = oneway_merge,
583 [2] = twoway_merge,
584 [3] = threeway_merge,
586 merge_fn_t fn;
588 if (stage < 2 || stage > 4)
589 die("just how do you expect me to merge %d trees?", stage-1);
590 if (emu23 && stage != 3)
591 die("--emu23 takes only two trees");
592 fn = merge_function[stage-1];
593 if (stage == 3 && emu23) {
594 setup_emu23();
595 fn = merge_function[3];
597 merge_cache(active_cache, active_nr, fn);
599 if (write_cache(newfd, active_cache, active_nr) ||
600 commit_index_file(&cache_file))
601 die("unable to write new index file");
602 return 0;