Merge branch 'ab/various-leak-fixes'
[git.git] / split-index.c
blob5d0f04763ea2d201a3bbe417bd1951aea23a8436
1 #include "cache.h"
2 #include "split-index.h"
3 #include "ewah/ewok.h"
5 struct split_index *init_split_index(struct index_state *istate)
7 if (!istate->split_index) {
8 if (istate->sparse_index)
9 die(_("cannot use split index with a sparse index"));
11 CALLOC_ARRAY(istate->split_index, 1);
12 istate->split_index->refcount = 1;
14 return istate->split_index;
17 int read_link_extension(struct index_state *istate,
18 const void *data_, unsigned long sz)
20 const unsigned char *data = data_;
21 struct split_index *si;
22 int ret;
24 if (sz < the_hash_algo->rawsz)
25 return error("corrupt link extension (too short)");
26 si = init_split_index(istate);
27 oidread(&si->base_oid, data);
28 data += the_hash_algo->rawsz;
29 sz -= the_hash_algo->rawsz;
30 if (!sz)
31 return 0;
32 si->delete_bitmap = ewah_new();
33 ret = ewah_read_mmap(si->delete_bitmap, data, sz);
34 if (ret < 0)
35 return error("corrupt delete bitmap in link extension");
36 data += ret;
37 sz -= ret;
38 si->replace_bitmap = ewah_new();
39 ret = ewah_read_mmap(si->replace_bitmap, data, sz);
40 if (ret < 0)
41 return error("corrupt replace bitmap in link extension");
42 if (ret != sz)
43 return error("garbage at the end of link extension");
44 return 0;
47 int write_link_extension(struct strbuf *sb,
48 struct index_state *istate)
50 struct split_index *si = istate->split_index;
51 strbuf_add(sb, si->base_oid.hash, the_hash_algo->rawsz);
52 if (!si->delete_bitmap && !si->replace_bitmap)
53 return 0;
54 ewah_serialize_strbuf(si->delete_bitmap, sb);
55 ewah_serialize_strbuf(si->replace_bitmap, sb);
56 return 0;
59 static void mark_base_index_entries(struct index_state *base)
61 int i;
63 * To keep track of the shared entries between
64 * istate->base->cache[] and istate->cache[], base entry
65 * position is stored in each base entry. All positions start
66 * from 1 instead of 0, which is reserved to say "this is a new
67 * entry".
69 for (i = 0; i < base->cache_nr; i++)
70 base->cache[i]->index = i + 1;
73 void move_cache_to_base_index(struct index_state *istate)
75 struct split_index *si = istate->split_index;
76 int i;
79 * If there was a previous base index, then transfer ownership of allocated
80 * entries to the parent index.
82 if (si->base &&
83 si->base->ce_mem_pool) {
85 if (!istate->ce_mem_pool) {
86 istate->ce_mem_pool = xmalloc(sizeof(struct mem_pool));
87 mem_pool_init(istate->ce_mem_pool, 0);
90 mem_pool_combine(istate->ce_mem_pool, istate->split_index->base->ce_mem_pool);
93 ALLOC_ARRAY(si->base, 1);
94 index_state_init(si->base, istate->repo);
95 si->base->version = istate->version;
96 /* zero timestamp disables racy test in ce_write_index() */
97 si->base->timestamp = istate->timestamp;
98 ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
99 si->base->cache_nr = istate->cache_nr;
102 * The mem_pool needs to move with the allocated entries.
104 si->base->ce_mem_pool = istate->ce_mem_pool;
105 istate->ce_mem_pool = NULL;
107 COPY_ARRAY(si->base->cache, istate->cache, istate->cache_nr);
108 mark_base_index_entries(si->base);
109 for (i = 0; i < si->base->cache_nr; i++)
110 si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
113 static void mark_entry_for_delete(size_t pos, void *data)
115 struct index_state *istate = data;
116 if (pos >= istate->cache_nr)
117 die("position for delete %d exceeds base index size %d",
118 (int)pos, istate->cache_nr);
119 istate->cache[pos]->ce_flags |= CE_REMOVE;
120 istate->split_index->nr_deletions++;
123 static void replace_entry(size_t pos, void *data)
125 struct index_state *istate = data;
126 struct split_index *si = istate->split_index;
127 struct cache_entry *dst, *src;
129 if (pos >= istate->cache_nr)
130 die("position for replacement %d exceeds base index size %d",
131 (int)pos, istate->cache_nr);
132 if (si->nr_replacements >= si->saved_cache_nr)
133 die("too many replacements (%d vs %d)",
134 si->nr_replacements, si->saved_cache_nr);
135 dst = istate->cache[pos];
136 if (dst->ce_flags & CE_REMOVE)
137 die("entry %d is marked as both replaced and deleted",
138 (int)pos);
139 src = si->saved_cache[si->nr_replacements];
140 if (ce_namelen(src))
141 die("corrupt link extension, entry %d should have "
142 "zero length name", (int)pos);
143 src->index = pos + 1;
144 src->ce_flags |= CE_UPDATE_IN_BASE;
145 src->ce_namelen = dst->ce_namelen;
146 copy_cache_entry(dst, src);
147 discard_cache_entry(src);
148 si->nr_replacements++;
151 void merge_base_index(struct index_state *istate)
153 struct split_index *si = istate->split_index;
154 unsigned int i;
156 mark_base_index_entries(si->base);
158 si->saved_cache = istate->cache;
159 si->saved_cache_nr = istate->cache_nr;
160 istate->cache_nr = si->base->cache_nr;
161 istate->cache = NULL;
162 istate->cache_alloc = 0;
163 ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
164 COPY_ARRAY(istate->cache, si->base->cache, istate->cache_nr);
166 si->nr_deletions = 0;
167 si->nr_replacements = 0;
168 ewah_each_bit(si->replace_bitmap, replace_entry, istate);
169 ewah_each_bit(si->delete_bitmap, mark_entry_for_delete, istate);
170 if (si->nr_deletions)
171 remove_marked_cache_entries(istate, 0);
173 for (i = si->nr_replacements; i < si->saved_cache_nr; i++) {
174 if (!ce_namelen(si->saved_cache[i]))
175 die("corrupt link extension, entry %d should "
176 "have non-zero length name", i);
177 add_index_entry(istate, si->saved_cache[i],
178 ADD_CACHE_OK_TO_ADD |
179 ADD_CACHE_KEEP_CACHE_TREE |
181 * we may have to replay what
182 * merge-recursive.c:update_stages()
183 * does, which has this flag on
185 ADD_CACHE_SKIP_DFCHECK);
186 si->saved_cache[i] = NULL;
189 ewah_free(si->delete_bitmap);
190 ewah_free(si->replace_bitmap);
191 FREE_AND_NULL(si->saved_cache);
192 si->delete_bitmap = NULL;
193 si->replace_bitmap = NULL;
194 si->saved_cache_nr = 0;
198 * Compare most of the fields in two cache entries, i.e. all except the
199 * hashmap_entry and the name.
201 static int compare_ce_content(struct cache_entry *a, struct cache_entry *b)
203 const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID |
204 CE_EXTENDED_FLAGS;
205 unsigned int ce_flags = a->ce_flags;
206 unsigned int base_flags = b->ce_flags;
207 int ret;
209 /* only on-disk flags matter */
210 a->ce_flags &= ondisk_flags;
211 b->ce_flags &= ondisk_flags;
212 ret = memcmp(&a->ce_stat_data, &b->ce_stat_data,
213 offsetof(struct cache_entry, name) -
214 offsetof(struct cache_entry, oid)) ||
215 !oideq(&a->oid, &b->oid);
216 a->ce_flags = ce_flags;
217 b->ce_flags = base_flags;
219 return ret;
222 void prepare_to_write_split_index(struct index_state *istate)
224 struct split_index *si = init_split_index(istate);
225 struct cache_entry **entries = NULL, *ce;
226 int i, nr_entries = 0, nr_alloc = 0;
228 si->delete_bitmap = ewah_new();
229 si->replace_bitmap = ewah_new();
231 if (si->base) {
232 /* Go through istate->cache[] and mark CE_MATCHED to
233 * entry with positive index. We'll go through
234 * base->cache[] later to delete all entries in base
235 * that are not marked with either CE_MATCHED or
236 * CE_UPDATE_IN_BASE. If istate->cache[i] is a
237 * duplicate, deduplicate it.
239 for (i = 0; i < istate->cache_nr; i++) {
240 struct cache_entry *base;
241 ce = istate->cache[i];
242 if (!ce->index) {
244 * During simple update index operations this
245 * is a cache entry that is not present in
246 * the shared index. It will be added to the
247 * split index.
249 * However, it might also represent a file
250 * that already has a cache entry in the
251 * shared index, but a new index has just
252 * been constructed by unpack_trees(), and
253 * this entry now refers to different content
254 * than what was recorded in the original
255 * index, e.g. during 'read-tree -m HEAD^' or
256 * 'checkout HEAD^'. In this case the
257 * original entry in the shared index will be
258 * marked as deleted, and this entry will be
259 * added to the split index.
261 continue;
263 if (ce->index > si->base->cache_nr) {
264 BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d",
265 ce->index, si->base->cache_nr);
267 ce->ce_flags |= CE_MATCHED; /* or "shared" */
268 base = si->base->cache[ce->index - 1];
269 if (ce == base) {
270 /* The entry is present in the shared index. */
271 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
273 * Already marked for inclusion in
274 * the split index, either because
275 * the corresponding file was
276 * modified and the cached stat data
277 * was refreshed, or because there
278 * is already a replacement entry in
279 * the split index.
280 * Nothing more to do here.
282 } else if (!ce_uptodate(ce) &&
283 is_racy_timestamp(istate, ce)) {
285 * A racily clean cache entry stored
286 * only in the shared index: it must
287 * be added to the split index, so
288 * the subsequent do_write_index()
289 * can smudge its stat data.
291 ce->ce_flags |= CE_UPDATE_IN_BASE;
292 } else {
294 * The entry is only present in the
295 * shared index and it was not
296 * refreshed.
297 * Just leave it there.
300 continue;
302 if (ce->ce_namelen != base->ce_namelen ||
303 strcmp(ce->name, base->name)) {
304 ce->index = 0;
305 continue;
308 * This is the copy of a cache entry that is present
309 * in the shared index, created by unpack_trees()
310 * while it constructed a new index.
312 if (ce->ce_flags & CE_UPDATE_IN_BASE) {
314 * Already marked for inclusion in the split
315 * index, either because the corresponding
316 * file was modified and the cached stat data
317 * was refreshed, or because the original
318 * entry already had a replacement entry in
319 * the split index.
320 * Nothing to do.
322 } else if (!ce_uptodate(ce) &&
323 is_racy_timestamp(istate, ce)) {
325 * A copy of a racily clean cache entry from
326 * the shared index. It must be added to
327 * the split index, so the subsequent
328 * do_write_index() can smudge its stat data.
330 ce->ce_flags |= CE_UPDATE_IN_BASE;
331 } else {
333 * Thoroughly compare the cached data to see
334 * whether it should be marked for inclusion
335 * in the split index.
337 * This comparison might be unnecessary, as
338 * code paths modifying the cached data do
339 * set CE_UPDATE_IN_BASE as well.
341 if (compare_ce_content(ce, base))
342 ce->ce_flags |= CE_UPDATE_IN_BASE;
344 discard_cache_entry(base);
345 si->base->cache[ce->index - 1] = ce;
347 for (i = 0; i < si->base->cache_nr; i++) {
348 ce = si->base->cache[i];
349 if ((ce->ce_flags & CE_REMOVE) ||
350 !(ce->ce_flags & CE_MATCHED))
351 ewah_set(si->delete_bitmap, i);
352 else if (ce->ce_flags & CE_UPDATE_IN_BASE) {
353 ewah_set(si->replace_bitmap, i);
354 ce->ce_flags |= CE_STRIP_NAME;
355 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
356 entries[nr_entries++] = ce;
358 if (is_null_oid(&ce->oid))
359 istate->drop_cache_tree = 1;
363 for (i = 0; i < istate->cache_nr; i++) {
364 ce = istate->cache[i];
365 if ((!si->base || !ce->index) && !(ce->ce_flags & CE_REMOVE)) {
366 assert(!(ce->ce_flags & CE_STRIP_NAME));
367 ALLOC_GROW(entries, nr_entries+1, nr_alloc);
368 entries[nr_entries++] = ce;
370 ce->ce_flags &= ~CE_MATCHED;
374 * take cache[] out temporarily, put entries[] in its place
375 * for writing
377 si->saved_cache = istate->cache;
378 si->saved_cache_nr = istate->cache_nr;
379 istate->cache = entries;
380 istate->cache_nr = nr_entries;
383 void finish_writing_split_index(struct index_state *istate)
385 struct split_index *si = init_split_index(istate);
387 ewah_free(si->delete_bitmap);
388 ewah_free(si->replace_bitmap);
389 si->delete_bitmap = NULL;
390 si->replace_bitmap = NULL;
391 free(istate->cache);
392 istate->cache = si->saved_cache;
393 istate->cache_nr = si->saved_cache_nr;
396 void discard_split_index(struct index_state *istate)
398 struct split_index *si = istate->split_index;
399 if (!si)
400 return;
401 istate->split_index = NULL;
402 si->refcount--;
403 if (si->refcount)
404 return;
405 if (si->base) {
406 discard_index(si->base);
407 free(si->base);
409 free(si);
412 void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce)
414 if (ce->index &&
415 istate->split_index &&
416 istate->split_index->base &&
417 ce->index <= istate->split_index->base->cache_nr &&
418 ce == istate->split_index->base->cache[ce->index - 1])
419 ce->ce_flags |= CE_REMOVE;
420 else
421 discard_cache_entry(ce);
424 void replace_index_entry_in_base(struct index_state *istate,
425 struct cache_entry *old_entry,
426 struct cache_entry *new_entry)
428 if (old_entry->index &&
429 istate->split_index &&
430 istate->split_index->base &&
431 old_entry->index <= istate->split_index->base->cache_nr) {
432 new_entry->index = old_entry->index;
433 if (old_entry != istate->split_index->base->cache[new_entry->index - 1])
434 discard_cache_entry(istate->split_index->base->cache[new_entry->index - 1]);
435 istate->split_index->base->cache[new_entry->index - 1] = new_entry;
439 void add_split_index(struct index_state *istate)
441 if (!istate->split_index) {
442 init_split_index(istate);
443 istate->cache_changed |= SPLIT_INDEX_ORDERED;
447 void remove_split_index(struct index_state *istate)
449 if (istate->split_index) {
450 if (istate->split_index->base) {
452 * When removing the split index, we need to move
453 * ownership of the mem_pool associated with the
454 * base index to the main index. There may be cache entries
455 * allocated from the base's memory pool that are shared with
456 * the_index.cache[].
458 mem_pool_combine(istate->ce_mem_pool,
459 istate->split_index->base->ce_mem_pool);
462 * The split index no longer owns the mem_pool backing
463 * its cache array. As we are discarding this index,
464 * mark the index as having no cache entries, so it
465 * will not attempt to clean up the cache entries or
466 * validate them.
468 istate->split_index->base->cache_nr = 0;
472 * We can discard the split index because its
473 * memory pool has been incorporated into the
474 * memory pool associated with the the_index.
476 discard_split_index(istate);
478 istate->cache_changed |= SOMETHING_CHANGED;