dir.h: move DTYPE defines from cache.h
[git.git] / split-index.c
blob3fc4e91485a775c241a07dacc3cb3fca3272f420
1 #include "cache.h"
2 #include "alloc.h"
3 #include "gettext.h"
4 #include "mem-pool.h"
5 #include "split-index.h"
6 #include "strbuf.h"
7 #include "ewah/ewok.h"
9 struct split_index *init_split_index(struct index_state *istate)
11 if (!istate->split_index) {
12 if (istate->sparse_index)
13 die(_("cannot use split index with a sparse index"));
15 CALLOC_ARRAY(istate->split_index, 1);
16 istate->split_index->refcount = 1;
18 return istate->split_index;
21 int read_link_extension(struct index_state *istate,
22 const void *data_, unsigned long sz)
24 const unsigned char *data = data_;
25 struct split_index *si;
26 int ret;
28 if (sz < the_hash_algo->rawsz)
29 return error("corrupt link extension (too short)");
30 si = init_split_index(istate);
31 oidread(&si->base_oid, data);
32 data += the_hash_algo->rawsz;
33 sz -= the_hash_algo->rawsz;
34 if (!sz)
35 return 0;
36 si->delete_bitmap = ewah_new();
37 ret = ewah_read_mmap(si->delete_bitmap, data, sz);
38 if (ret < 0)
39 return error("corrupt delete bitmap in link extension");
40 data += ret;
41 sz -= ret;
42 si->replace_bitmap = ewah_new();
43 ret = ewah_read_mmap(si->replace_bitmap, data, sz);
44 if (ret < 0)
45 return error("corrupt replace bitmap in link extension");
46 if (ret != sz)
47 return error("garbage at the end of link extension");
48 return 0;
51 int write_link_extension(struct strbuf *sb,
52 struct index_state *istate)
54 struct split_index *si = istate->split_index;
55 strbuf_add(sb, si->base_oid.hash, the_hash_algo->rawsz);
56 if (!si->delete_bitmap && !si->replace_bitmap)
57 return 0;
58 ewah_serialize_strbuf(si->delete_bitmap, sb);
59 ewah_serialize_strbuf(si->replace_bitmap, sb);
60 return 0;
63 static void mark_base_index_entries(struct index_state *base)
65 int i;
67 * To keep track of the shared entries between
68 * istate->base->cache[] and istate->cache[], base entry
69 * position is stored in each base entry. All positions start
70 * from 1 instead of 0, which is reserved to say "this is a new
71 * entry".
73 for (i = 0; i < base->cache_nr; i++)
74 base->cache[i]->index = i + 1;
77 void move_cache_to_base_index(struct index_state *istate)
79 struct split_index *si = istate->split_index;
80 int i;
83 * If there was a previous base index, then transfer ownership of allocated
84 * entries to the parent index.
86 if (si->base &&
87 si->base->ce_mem_pool) {
89 if (!istate->ce_mem_pool) {
90 istate->ce_mem_pool = xmalloc(sizeof(struct mem_pool));
91 mem_pool_init(istate->ce_mem_pool, 0);
94 mem_pool_combine(istate->ce_mem_pool, istate->split_index->base->ce_mem_pool);
97 ALLOC_ARRAY(si->base, 1);
98 index_state_init(si->base, istate->repo);
99 si->base->version = istate->version;
100 /* zero timestamp disables racy test in ce_write_index() */
101 si->base->timestamp = istate->timestamp;
102 ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
103 si->base->cache_nr = istate->cache_nr;
106 * The mem_pool needs to move with the allocated entries.
108 si->base->ce_mem_pool = istate->ce_mem_pool;
109 istate->ce_mem_pool = NULL;
111 COPY_ARRAY(si->base->cache, istate->cache, istate->cache_nr);
112 mark_base_index_entries(si->base);
113 for (i = 0; i < si->base->cache_nr; i++)
114 si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
117 static void mark_entry_for_delete(size_t pos, void *data)
119 struct index_state *istate = data;
120 if (pos >= istate->cache_nr)
121 die("position for delete %d exceeds base index size %d",
122 (int)pos, istate->cache_nr);
123 istate->cache[pos]->ce_flags |= CE_REMOVE;
124 istate->split_index->nr_deletions++;
127 static void replace_entry(size_t pos, void *data)
129 struct index_state *istate = data;
130 struct split_index *si = istate->split_index;
131 struct cache_entry *dst, *src;
133 if (pos >= istate->cache_nr)
134 die("position for replacement %d exceeds base index size %d",
135 (int)pos, istate->cache_nr);
136 if (si->nr_replacements >= si->saved_cache_nr)
137 die("too many replacements (%d vs %d)",
138 si->nr_replacements, si->saved_cache_nr);
139 dst = istate->cache[pos];
140 if (dst->ce_flags & CE_REMOVE)
141 die("entry %d is marked as both replaced and deleted",
142 (int)pos);
143 src = si->saved_cache[si->nr_replacements];
144 if (ce_namelen(src))
145 die("corrupt link extension, entry %d should have "
146 "zero length name", (int)pos);
147 src->index = pos + 1;
148 src->ce_flags |= CE_UPDATE_IN_BASE;
149 src->ce_namelen = dst->ce_namelen;
150 copy_cache_entry(dst, src);
151 discard_cache_entry(src);
152 si->nr_replacements++;
155 void merge_base_index(struct index_state *istate)
157 struct split_index *si = istate->split_index;
158 unsigned int i;
160 mark_base_index_entries(si->base);
162 si->saved_cache = istate->cache;
163 si->saved_cache_nr = istate->cache_nr;
164 istate->cache_nr = si->base->cache_nr;
165 istate->cache = NULL;
166 istate->cache_alloc = 0;
167 ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
168 COPY_ARRAY(istate->cache, si->base->cache, istate->cache_nr);
170 si->nr_deletions = 0;
171 si->nr_replacements = 0;
172 ewah_each_bit(si->replace_bitmap, replace_entry, istate);
173 ewah_each_bit(si->delete_bitmap, mark_entry_for_delete, istate);
174 if (si->nr_deletions)
175 remove_marked_cache_entries(istate, 0);
177 for (i = si->nr_replacements; i < si->saved_cache_nr; i++) {
178 if (!ce_namelen(si->saved_cache[i]))
179 die("corrupt link extension, entry %d should "
180 "have non-zero length name", i);
181 add_index_entry(istate, si->saved_cache[i],
182 ADD_CACHE_OK_TO_ADD |
183 ADD_CACHE_KEEP_CACHE_TREE |
185 * we may have to replay what
186 * merge-recursive.c:update_stages()
187 * does, which has this flag on
189 ADD_CACHE_SKIP_DFCHECK);
190 si->saved_cache[i] = NULL;
193 ewah_free(si->delete_bitmap);
194 ewah_free(si->replace_bitmap);
195 FREE_AND_NULL(si->saved_cache);
196 si->delete_bitmap = NULL;
197 si->replace_bitmap = NULL;
198 si->saved_cache_nr = 0;
202 * Compare most of the fields in two cache entries, i.e. all except the
203 * hashmap_entry and the name.
205 static int compare_ce_content(struct cache_entry *a, struct cache_entry *b)
207 const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID |
208 CE_EXTENDED_FLAGS;
209 unsigned int ce_flags = a->ce_flags;
210 unsigned int base_flags = b->ce_flags;
211 int ret;
213 /* only on-disk flags matter */
214 a->ce_flags &= ondisk_flags;
215 b->ce_flags &= ondisk_flags;
216 ret = memcmp(&a->ce_stat_data, &b->ce_stat_data,
217 offsetof(struct cache_entry, name) -
218 offsetof(struct cache_entry, oid)) ||
219 !oideq(&a->oid, &b->oid);
220 a->ce_flags = ce_flags;
221 b->ce_flags = base_flags;
223 return ret;
226 void prepare_to_write_split_index(struct index_state *istate)
228 struct split_index *si = init_split_index(istate);
229 struct cache_entry **entries = NULL, *ce;
230 int i, nr_entries = 0, nr_alloc = 0;
232 si->delete_bitmap = ewah_new();
233 si->replace_bitmap = ewah_new();
235 if (si->base) {
236 /* Go through istate->cache[] and mark CE_MATCHED to
237 * entry with positive index. We'll go through
238 * base->cache[] later to delete all entries in base
239 * that are not marked with either CE_MATCHED or
240 * CE_UPDATE_IN_BASE. If istate->cache[i] is a
241 * duplicate, deduplicate it.
243 for (i = 0; i < istate->cache_nr; i++) {
244 struct cache_entry *base;
245 ce = istate->cache[i];
246 if (!ce->index) {
248 * During simple update index operations this
249 * is a cache entry that is not present in
250 * the shared index. It will be added to the
251 * split index.
253 * However, it might also represent a file
254 * that already has a cache entry in the
255 * shared index, but a new index has just
256 * been constructed by unpack_trees(), and
257 * this entry now refers to different content
258 * than what was recorded in the original
259 * index, e.g. during 'read-tree -m HEAD^' or
260 * 'checkout HEAD^'. In this case the
261 * original entry in the shared index will be
262 * marked as deleted, and this entry will be
263 * added to the split index.
265 continue;
267 if (ce->index > si->base->cache_nr) {
268 BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d",
269 ce->index, si->base->cache_nr);
271 ce->ce_flags |= CE_MATCHED; /* or "shared" */
272 base = si->base->cache[ce->index - 1];
273 if (ce == base) {
274 /* The entry is present in the shared index. */
275 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
277 * Already marked for inclusion in
278 * the split index, either because
279 * the corresponding file was
280 * modified and the cached stat data
281 * was refreshed, or because there
282 * is already a replacement entry in
283 * the split index.
284 * Nothing more to do here.
286 } else if (!ce_uptodate(ce) &&
287 is_racy_timestamp(istate, ce)) {
289 * A racily clean cache entry stored
290 * only in the shared index: it must
291 * be added to the split index, so
292 * the subsequent do_write_index()
293 * can smudge its stat data.
295 ce->ce_flags |= CE_UPDATE_IN_BASE;
296 } else {
298 * The entry is only present in the
299 * shared index and it was not
300 * refreshed.
301 * Just leave it there.
304 continue;
306 if (ce->ce_namelen != base->ce_namelen ||
307 strcmp(ce->name, base->name)) {
308 ce->index = 0;
309 continue;
312 * This is the copy of a cache entry that is present
313 * in the shared index, created by unpack_trees()
314 * while it constructed a new index.
316 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
318 * Already marked for inclusion in the split
319 * index, either because the corresponding
320 * file was modified and the cached stat data
321 * was refreshed, or because the original
322 * entry already had a replacement entry in
323 * the split index.
324 * Nothing to do.
326 } else if (!ce_uptodate(ce) &&
327 is_racy_timestamp(istate, ce)) {
329 * A copy of a racily clean cache entry from
330 * the shared index. It must be added to
331 * the split index, so the subsequent
332 * do_write_index() can smudge its stat data.
334 ce->ce_flags |= CE_UPDATE_IN_BASE;
335 } else {
337 * Thoroughly compare the cached data to see
338 * whether it should be marked for inclusion
339 * in the split index.
341 * This comparison might be unnecessary, as
342 * code paths modifying the cached data do
343 * set CE_UPDATE_IN_BASE as well.
345 if (compare_ce_content(ce, base))
346 ce->ce_flags |= CE_UPDATE_IN_BASE;
348 discard_cache_entry(base);
349 si->base->cache[ce->index - 1] = ce;
351 for (i = 0; i < si->base->cache_nr; i++) {
352 ce = si->base->cache[i];
353 if ((ce->ce_flags & CE_REMOVE) ||
354 !(ce->ce_flags & CE_MATCHED))
355 ewah_set(si->delete_bitmap, i);
356 else if (ce->ce_flags & CE_UPDATE_IN_BASE) {
357 ewah_set(si->replace_bitmap, i);
358 ce->ce_flags |= CE_STRIP_NAME;
359 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
360 entries[nr_entries++] = ce;
362 if (is_null_oid(&ce->oid))
363 istate->drop_cache_tree = 1;
367 for (i = 0; i < istate->cache_nr; i++) {
368 ce = istate->cache[i];
369 if ((!si->base || !ce->index) && !(ce->ce_flags & CE_REMOVE)) {
370 assert(!(ce->ce_flags & CE_STRIP_NAME));
371 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
372 entries[nr_entries++] = ce;
374 ce->ce_flags &= ~CE_MATCHED;
378 * take cache[] out temporarily, put entries[] in its place
379 * for writing
381 si->saved_cache = istate->cache;
382 si->saved_cache_nr = istate->cache_nr;
383 istate->cache = entries;
384 istate->cache_nr = nr_entries;
387 void finish_writing_split_index(struct index_state *istate)
389 struct split_index *si = init_split_index(istate);
391 ewah_free(si->delete_bitmap);
392 ewah_free(si->replace_bitmap);
393 si->delete_bitmap = NULL;
394 si->replace_bitmap = NULL;
395 free(istate->cache);
396 istate->cache = si->saved_cache;
397 istate->cache_nr = si->saved_cache_nr;
400 void discard_split_index(struct index_state *istate)
402 struct split_index *si = istate->split_index;
403 if (!si)
404 return;
405 istate->split_index = NULL;
406 si->refcount--;
407 if (si->refcount)
408 return;
409 if (si->base) {
410 discard_index(si->base);
411 free(si->base);
413 free(si);
416 void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce)
418 if (ce->index &&
419 istate->split_index &&
420 istate->split_index->base &&
421 ce->index <= istate->split_index->base->cache_nr &&
422 ce == istate->split_index->base->cache[ce->index - 1])
423 ce->ce_flags |= CE_REMOVE;
424 else
425 discard_cache_entry(ce);
428 void replace_index_entry_in_base(struct index_state *istate,
429 struct cache_entry *old_entry,
430 struct cache_entry *new_entry)
432 if (old_entry->index &&
433 istate->split_index &&
434 istate->split_index->base &&
435 old_entry->index <= istate->split_index->base->cache_nr) {
436 new_entry->index = old_entry->index;
437 if (old_entry != istate->split_index->base->cache[new_entry->index - 1])
438 discard_cache_entry(istate->split_index->base->cache[new_entry->index - 1]);
439 istate->split_index->base->cache[new_entry->index - 1] = new_entry;
443 void add_split_index(struct index_state *istate)
445 if (!istate->split_index) {
446 init_split_index(istate);
447 istate->cache_changed |= SPLIT_INDEX_ORDERED;
451 void remove_split_index(struct index_state *istate)
453 if (istate->split_index) {
454 if (istate->split_index->base) {
456 * When removing the split index, we need to move
457 * ownership of the mem_pool associated with the
458 * base index to the main index. There may be cache entries
459 * allocated from the base's memory pool that are shared with
460 * the_index.cache[].
462 mem_pool_combine(istate->ce_mem_pool,
463 istate->split_index->base->ce_mem_pool);
466 * The split index no longer owns the mem_pool backing
467 * its cache array. As we are discarding this index,
468 * mark the index as having no cache entries, so it
469 * will not attempt to clean up the cache entries or
470 * validate them.
472 istate->split_index->base->cache_nr = 0;
476 * We can discard the split index because its
477 * memory pool has been incorporated into the
478 * memory pool associated with the the_index.
480 discard_split_index(istate);
482 istate->cache_changed |= SOMETHING_CHANGED;