hammer2 - error handling 2/N (chain_lookup/chain_next)
[dragonfly.git] / sys / vfs / hammer2 / hammer2_bulkfree.c
blob38514a658072e133f8dcaf4ced454fa53fc454a8
1 /*
2 * Copyright (c) 2013-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/fcntl.h>
38 #include <sys/buf.h>
39 #include <sys/proc.h>
40 #include <sys/namei.h>
41 #include <sys/mount.h>
42 #include <sys/vnode.h>
43 #include <sys/mountctl.h>
44 #include <vm/vm_kern.h>
45 #include <vm/vm_extern.h>
47 #include "hammer2.h"
49 #define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
50 #define H2FMSHIFT(radix) ((hammer2_off_t)1 << (radix))
53 * breadth-first search
55 typedef struct hammer2_chain_save {
56 TAILQ_ENTRY(hammer2_chain_save) entry;
57 hammer2_chain_t *chain;
58 int pri;
59 } hammer2_chain_save_t;
61 TAILQ_HEAD(hammer2_chain_save_list, hammer2_chain_save);
62 typedef struct hammer2_chain_save_list hammer2_chain_save_list_t;
64 typedef struct hammer2_bulkfree_info {
65 hammer2_dev_t *hmp;
66 kmem_anon_desc_t kp;
67 hammer2_off_t sbase; /* sub-loop iteration */
68 hammer2_off_t sstop;
69 hammer2_bmap_data_t *bmap;
70 int depth;
71 long count_10_00; /* staged->free */
72 long count_11_10; /* allocated->staged */
73 long count_00_11; /* (should not happen) */
74 long count_01_11; /* (should not happen) */
75 long count_10_11; /* staged->allocated */
76 long count_l0cleans;
77 long count_linadjusts;
78 long count_inodes_scanned;
79 long count_dedup_factor;
80 long bytes_scanned;
81 hammer2_off_t adj_free;
82 hammer2_tid_t mtid;
83 hammer2_tid_t saved_mirror_tid;
84 time_t save_time;
85 hammer2_chain_save_list_t list;
86 hammer2_dedup_t *dedup;
87 int pri;
88 } hammer2_bulkfree_info_t;
90 static int h2_bulkfree_test(hammer2_bulkfree_info_t *info,
91 hammer2_blockref_t *bref, int pri);
94 * General bulk scan function with callback. Called with a referenced
95 * but UNLOCKED parent. The parent is returned in the same state.
97 static
98 int
99 hammer2_bulk_scan(hammer2_chain_t *parent,
100 int (*func)(hammer2_bulkfree_info_t *info,
101 hammer2_blockref_t *bref),
102 hammer2_bulkfree_info_t *info)
104 hammer2_blockref_t bref;
105 hammer2_chain_t *chain;
106 int first = 1;
107 int rup_error;
108 int error;
110 ++info->pri;
112 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS |
113 HAMMER2_RESOLVE_SHARED);
114 chain = NULL;
115 rup_error = 0;
116 error = 0;
119 * Generally loop on the contents if we have not been flagged
120 * for abort.
122 * Remember that these chains are completely isolated from
123 * the frontend, so we can release locks temporarily without
124 * imploding.
126 for (;;) {
127 error |= hammer2_chain_scan(parent, &chain, &bref, &first,
128 HAMMER2_LOOKUP_NODATA |
129 HAMMER2_LOOKUP_SHARED);
132 * Handle EOF or other error at current level. This stops
133 * the bulkfree scan.
135 if (error)
136 break;
139 * Process bref, chain is only non-NULL if the bref
140 * might be recursable (its possible that we sometimes get
141 * a non-NULL chain where the bref cannot be recursed).
143 ++info->pri;
144 if (h2_bulkfree_test(info, &bref, 1))
145 continue;
147 error |= func(info, &bref);
148 if (error)
149 break;
152 * A non-null chain is always returned if it is
153 * recursive, otherwise a non-null chain might be
154 * returned but usually is not when not recursive.
156 if (chain == NULL)
157 continue;
160 * Else check type and setup depth-first scan.
162 * Account for bytes actually read.
164 info->bytes_scanned += chain->bytes;
166 switch(chain->bref.type) {
167 case HAMMER2_BREF_TYPE_INODE:
168 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
169 case HAMMER2_BREF_TYPE_INDIRECT:
170 case HAMMER2_BREF_TYPE_VOLUME:
171 case HAMMER2_BREF_TYPE_FREEMAP:
172 ++info->depth;
173 if (info->depth > 16) {
174 hammer2_chain_save_t *save;
175 save = kmalloc(sizeof(*save), M_HAMMER2,
176 M_WAITOK | M_ZERO);
177 save->chain = chain;
178 hammer2_chain_ref(chain);
179 TAILQ_INSERT_TAIL(&info->list, save, entry);
181 /* guess */
182 info->pri += 10;
183 } else {
184 int savepri = info->pri;
186 hammer2_chain_unlock(chain);
187 info->pri = 0;
188 rup_error |=
189 hammer2_bulk_scan(chain, func, info);
190 info->pri += savepri;
191 hammer2_chain_lock(chain,
192 HAMMER2_RESOLVE_ALWAYS |
193 HAMMER2_RESOLVE_SHARED);
195 --info->depth;
196 break;
197 default:
198 /* does not recurse */
199 break;
201 if (rup_error & HAMMER2_ERROR_ABORTED)
202 break;
204 if (chain) {
205 hammer2_chain_unlock(chain);
206 hammer2_chain_drop(chain);
210 * Save with higher pri now that we know what it is.
212 h2_bulkfree_test(info, &parent->bref, info->pri + 1);
214 hammer2_chain_unlock(parent);
216 return ((error | rup_error) & ~HAMMER2_ERROR_EOF);
220 * Bulkfree algorithm
222 * Repeat {
223 * Chain flush (partial synchronization) XXX removed
224 * Scan the whole topology - build in-memory freemap (mark 11)
225 * Reconcile the in-memory freemap against the on-disk freemap.
226 * ondisk xx -> ondisk 11 (if allocated)
227 * ondisk 11 -> ondisk 10 (if free in-memory)
228 * ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
231 * The topology scan may have to be performed multiple times to window
232 * freemaps which are too large to fit in kernel memory.
234 * Races are handled using a double-transition (11->10, 10->00). The bulkfree
235 * scan snapshots the volume root's blockset and thus can run concurrent with
236 * normal operations, as long as a full flush is made between each pass to
237 * synchronize any modified chains (otherwise their blocks might be improperly
238 * freed).
240 * Temporary memory in multiples of 64KB is required to reconstruct the leaf
241 * hammer2_bmap_data blocks so they can later be compared against the live
242 * freemap. Each 64KB block represents 128 x 16KB x 1024 = ~2 GB of storage.
243 * A 32MB save area thus represents around ~1 TB. The temporary memory
244 * allocated can be specified. If it is not sufficient multiple topology
245 * passes will be made.
249 * Bulkfree callback info
251 static void hammer2_bulkfree_thread(void *arg __unused);
252 static void cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size);
253 static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo,
254 hammer2_blockref_t *bref);
255 static int h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo);
256 static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
257 hammer2_off_t data_off, hammer2_bmap_data_t *live,
258 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base);
260 void
261 hammer2_bulkfree_init(hammer2_dev_t *hmp)
263 hammer2_thr_create(&hmp->bfthr, NULL, hmp,
264 hmp->devrepname, -1, -1,
265 hammer2_bulkfree_thread);
268 void
269 hammer2_bulkfree_uninit(hammer2_dev_t *hmp)
271 hammer2_thr_delete(&hmp->bfthr);
274 static void
275 hammer2_bulkfree_thread(void *arg)
277 hammer2_thread_t *thr = arg;
278 hammer2_ioc_bulkfree_t bfi;
279 uint32_t flags;
281 for (;;) {
282 hammer2_thr_wait_any(thr,
283 HAMMER2_THREAD_STOP |
284 HAMMER2_THREAD_FREEZE |
285 HAMMER2_THREAD_UNFREEZE |
286 HAMMER2_THREAD_REMASTER,
287 hz * 60);
289 flags = thr->flags;
290 cpu_ccfence();
291 if (flags & HAMMER2_THREAD_STOP)
292 break;
293 if (flags & HAMMER2_THREAD_FREEZE) {
294 hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN,
295 HAMMER2_THREAD_FREEZE);
296 continue;
298 if (flags & HAMMER2_THREAD_UNFREEZE) {
299 hammer2_thr_signal2(thr, 0,
300 HAMMER2_THREAD_FROZEN |
301 HAMMER2_THREAD_UNFREEZE);
302 continue;
304 if (flags & HAMMER2_THREAD_FROZEN)
305 continue;
306 if (flags & HAMMER2_THREAD_REMASTER) {
307 hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER);
308 bzero(&bfi, sizeof(bfi));
309 bfi.size = 8192 * 1024;
310 /* hammer2_bulkfree_pass(thr->hmp, &bfi); */
313 thr->td = NULL;
314 hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED);
315 /* structure can go invalid at this point */
319 hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
320 hammer2_ioc_bulkfree_t *bfi)
322 hammer2_bulkfree_info_t cbinfo;
323 hammer2_chain_save_t *save;
324 hammer2_off_t incr;
325 size_t size;
326 int error;
329 * We have to clear the live dedup cache as it might have entries
330 * that are freeable as of now. Any new entries in the dedup cache
331 * made after this point, even if they become freeable, will have
332 * previously been fully allocated and will be protected by the
333 * 2-stage bulkfree.
335 hammer2_dedup_clear(hmp);
338 * Setup for free pass
340 bzero(&cbinfo, sizeof(cbinfo));
341 size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
342 ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
343 cbinfo.hmp = hmp;
344 cbinfo.bmap = kmem_alloc_swapbacked(&cbinfo.kp, size, VM_SUBSYS_HAMMER);
345 cbinfo.saved_mirror_tid = hmp->voldata.mirror_tid;
347 cbinfo.dedup = kmalloc(sizeof(*cbinfo.dedup) * HAMMER2_DEDUP_HEUR_SIZE,
348 M_HAMMER2, M_WAITOK | M_ZERO);
351 * Normalize start point to a 2GB boundary. We operate on a
352 * 64KB leaf bitmap boundary which represents 2GB of storage.
354 cbinfo.sbase = bfi->sbase;
355 if (cbinfo.sbase > hmp->voldata.volu_size)
356 cbinfo.sbase = hmp->voldata.volu_size;
357 cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
358 TAILQ_INIT(&cbinfo.list);
361 * Loop on a full meta-data scan as many times as required to
362 * get through all available storage.
364 error = 0;
365 while (cbinfo.sbase < hmp->voldata.volu_size) {
367 * We have enough ram to represent (incr) bytes of storage.
368 * Each 64KB of ram represents 2GB of storage.
370 * We must also clean out our de-duplication heuristic for
371 * each (incr) bytes of storage, otherwise we wind up not
372 * scanning meta-data for later areas of storage because
373 * they had already been scanned in earlier areas of storage.
374 * Since the ranging is different, we have to restart
375 * the dedup heuristic too.
377 cbinfo_bmap_init(&cbinfo, size);
378 bzero(cbinfo.dedup, sizeof(*cbinfo.dedup) *
379 HAMMER2_DEDUP_HEUR_SIZE);
380 incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE *
381 HAMMER2_FREEMAP_LEVEL1_SIZE;
382 if (hmp->voldata.volu_size - cbinfo.sbase < incr)
383 cbinfo.sstop = hmp->voldata.volu_size;
384 else
385 cbinfo.sstop = cbinfo.sbase + incr;
386 if (hammer2_debug & 1) {
387 kprintf("bulkfree pass %016jx/%jdGB\n",
388 (intmax_t)cbinfo.sbase,
389 (intmax_t)incr / HAMMER2_FREEMAP_LEVEL1_SIZE);
393 * Scan topology for stuff inside this range.
395 hammer2_trans_init(hmp->spmp, 0);
396 cbinfo.mtid = hammer2_trans_sub(hmp->spmp);
397 cbinfo.pri = 0;
398 error |= hammer2_bulk_scan(vchain, h2_bulkfree_callback,
399 &cbinfo);
401 while ((save = TAILQ_FIRST(&cbinfo.list)) != NULL &&
402 error == 0) {
403 TAILQ_REMOVE(&cbinfo.list, save, entry);
404 cbinfo.pri = 0;
405 error |= hammer2_bulk_scan(save->chain,
406 h2_bulkfree_callback,
407 &cbinfo);
408 hammer2_chain_drop(save->chain);
409 kfree(save, M_HAMMER2);
411 while (save) {
412 TAILQ_REMOVE(&cbinfo.list, save, entry);
413 hammer2_chain_drop(save->chain);
414 kfree(save, M_HAMMER2);
415 save = TAILQ_FIRST(&cbinfo.list);
418 kprintf("bulkfree lastdrop %d %d error=0x%04x\n",
419 vchain->refs, vchain->core.chain_count, error);
422 * If complete scan succeeded we can synchronize our
423 * in-memory freemap against live storage. If an abort
424 * did occur we cannot safely synchronize our partially
425 * filled-out in-memory freemap.
427 if (error == 0) {
428 error = h2_bulkfree_sync(&cbinfo);
430 hammer2_voldata_lock(hmp);
431 hammer2_voldata_modify(hmp);
432 hmp->voldata.allocator_free += cbinfo.adj_free;
433 hammer2_voldata_unlock(hmp);
437 * Cleanup for next loop.
439 hammer2_trans_done(hmp->spmp);
440 if (error)
441 break;
442 cbinfo.sbase = cbinfo.sstop;
443 cbinfo.adj_free = 0;
445 kmem_free_swapbacked(&cbinfo.kp);
446 kfree(cbinfo.dedup, M_HAMMER2);
447 cbinfo.dedup = NULL;
449 bfi->sstop = cbinfo.sbase;
451 incr = bfi->sstop / (hmp->voldata.volu_size / 10000);
452 if (incr > 10000)
453 incr = 10000;
455 kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n",
456 (int)incr / 100,
457 (int)incr % 100);
459 if (error) {
460 kprintf(" bulkfree was aborted\n");
461 } else {
462 kprintf(" transition->free %ld\n", cbinfo.count_10_00);
463 kprintf(" transition->staged %ld\n", cbinfo.count_11_10);
464 kprintf(" ERR(00)->allocated %ld\n", cbinfo.count_00_11);
465 kprintf(" ERR(01)->allocated %ld\n", cbinfo.count_01_11);
466 kprintf(" staged->allocated %ld\n", cbinfo.count_10_11);
467 kprintf(" ~2MB segs cleaned %ld\n", cbinfo.count_l0cleans);
468 kprintf(" linear adjusts %ld\n",
469 cbinfo.count_linadjusts);
470 kprintf(" dedup factor %ld\n",
471 cbinfo.count_dedup_factor);
474 return error;
477 static void
478 cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size)
480 hammer2_bmap_data_t *bmap = cbinfo->bmap;
481 hammer2_key_t key = cbinfo->sbase;
482 hammer2_key_t lokey;
483 hammer2_key_t hikey;
485 lokey = (cbinfo->hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
486 ~HAMMER2_SEGMASK64;
487 hikey = cbinfo->hmp->voldata.volu_size & ~HAMMER2_SEGMASK64;
489 bzero(bmap, size);
490 while (size) {
491 if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
492 HAMMER2_ZONE_SEG64) {
493 lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
494 HAMMER2_ZONE_SEG64;
496 if (key < lokey || key >= hikey) {
497 memset(bmap->bitmapq, -1,
498 sizeof(bmap->bitmapq));
499 bmap->avail = 0;
500 bmap->linear = HAMMER2_SEGSIZE;
501 } else {
502 bmap->avail = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
504 size -= sizeof(*bmap);
505 key += HAMMER2_FREEMAP_LEVEL0_SIZE;
506 ++bmap;
510 static int
511 h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
513 hammer2_bmap_data_t *bmap;
514 hammer2_off_t data_off;
515 uint16_t class;
516 size_t bytes;
517 int radix;
520 * Check for signal and allow yield to userland during scan
522 if (hammer2_signal_check(&cbinfo->save_time))
523 return HAMMER2_ERROR_ABORTED;
525 if (bref->type == HAMMER2_BREF_TYPE_INODE) {
526 ++cbinfo->count_inodes_scanned;
527 if ((cbinfo->count_inodes_scanned & 65535) == 0)
528 kprintf(" inodes %6ld bytes %9ld\n",
529 cbinfo->count_inodes_scanned,
530 cbinfo->bytes_scanned);
534 * Calculate the data offset and determine if it is within
535 * the current freemap range being gathered.
537 data_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
538 if (data_off < cbinfo->sbase || data_off >= cbinfo->sstop)
539 return 0;
540 if (data_off < cbinfo->hmp->voldata.allocator_beg)
541 return 0;
542 if (data_off >= cbinfo->hmp->voldata.volu_size)
543 return 0;
546 * Calculate the information needed to generate the in-memory
547 * freemap record.
549 * Hammer2 does not allow allocations to cross the L1 (2GB) boundary,
550 * it's a problem if it does. (Or L0 (2MB) for that matter).
552 radix = (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
553 KKASSERT(radix != 0);
554 bytes = (size_t)1 << radix;
555 class = (bref->type << 8) | hammer2_devblkradix(radix);
557 if (data_off + bytes >= cbinfo->sstop) {
558 kprintf("hammer2_bulkfree_scan: illegal 2GB boundary "
559 "%016jx %016jx/%d\n",
560 (intmax_t)bref->data_off,
561 (intmax_t)bref->key,
562 bref->keybits);
563 bytes = cbinfo->sstop - data_off; /* XXX */
567 * Convert to a storage offset relative to the beginning of the
568 * storage range we are collecting. Then lookup the level0 bmap entry.
570 data_off -= cbinfo->sbase;
571 bmap = cbinfo->bmap + (data_off >> HAMMER2_FREEMAP_LEVEL0_RADIX);
574 * Convert data_off to a bmap-relative value (~4MB storage range).
575 * Adjust linear, class, and avail.
577 * Hammer2 does not allow allocations to cross the L0 (4MB) boundary,
579 data_off &= HAMMER2_FREEMAP_LEVEL0_MASK;
580 if (data_off + bytes > HAMMER2_FREEMAP_LEVEL0_SIZE) {
581 kprintf("hammer2_bulkfree_scan: illegal 4MB boundary "
582 "%016jx %016jx/%d\n",
583 (intmax_t)bref->data_off,
584 (intmax_t)bref->key,
585 bref->keybits);
586 bytes = HAMMER2_FREEMAP_LEVEL0_SIZE - data_off;
589 if (bmap->class == 0) {
590 bmap->class = class;
591 bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
593 if (bmap->class != class) {
594 kprintf("hammer2_bulkfree_scan: illegal mixed class "
595 "%016jx %016jx/%d (%04x vs %04x)\n",
596 (intmax_t)bref->data_off,
597 (intmax_t)bref->key,
598 bref->keybits,
599 class, bmap->class);
603 * Just record the highest byte-granular offset for now. Do not
604 * match against allocations which are in multiples of whole blocks.
606 * Make sure that any in-block linear offset at least covers the
607 * data range. This can cause bmap->linear to become block-aligned.
609 if (bytes & HAMMER2_FREEMAP_BLOCK_MASK) {
610 if (bmap->linear < (int32_t)data_off + (int32_t)bytes)
611 bmap->linear = (int32_t)data_off + (int32_t)bytes;
612 } else if (bmap->linear >= (int32_t)data_off &&
613 bmap->linear < (int32_t)data_off + (int32_t)bytes) {
614 bmap->linear = (int32_t)data_off + (int32_t)bytes;
618 * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS].
619 * 64-bit entries, 2 bits per entry, to code 11.
621 * NOTE: data_off mask to 524288, shift right by 14 (radix for 16384),
622 * and multiply shift amount by 2 for sets of 2 bits.
624 * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE.
625 * also, data_off may not be FREEMAP_BLOCK_SIZE aligned.
627 while (bytes > 0) {
628 hammer2_bitmap_t bmask;
629 int bindex;
631 bindex = (int)data_off >> (HAMMER2_FREEMAP_BLOCK_RADIX +
632 HAMMER2_BMAP_INDEX_RADIX);
633 bmask = (hammer2_bitmap_t)3 <<
634 ((((int)data_off & HAMMER2_BMAP_INDEX_MASK) >>
635 HAMMER2_FREEMAP_BLOCK_RADIX) << 1);
638 * NOTE! The (avail) calculation is bitmap-granular. Multiple
639 * sub-granular records can wind up at the same bitmap
640 * position.
642 if ((bmap->bitmapq[bindex] & bmask) == 0) {
643 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) {
644 bmap->avail -= HAMMER2_FREEMAP_BLOCK_SIZE;
645 } else {
646 bmap->avail -= bytes;
648 bmap->bitmapq[bindex] |= bmask;
650 data_off += HAMMER2_FREEMAP_BLOCK_SIZE;
651 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE)
652 bytes = 0;
653 else
654 bytes -= HAMMER2_FREEMAP_BLOCK_SIZE;
656 return 0;
660 * Synchronize the in-memory bitmap with the live freemap. This is not a
661 * direct copy. Instead the bitmaps must be compared:
663 * In-memory Live-freemap
664 * 00 11 -> 10 (do nothing if live modified)
665 * 10 -> 00 (do nothing if live modified)
666 * 11 10 -> 11 handles race against live
667 * ** -> 11 nominally warn of corruption
670 static int
671 h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
673 hammer2_off_t data_off;
674 hammer2_key_t key;
675 hammer2_key_t key_dummy;
676 hammer2_bmap_data_t *bmap;
677 hammer2_bmap_data_t *live;
678 hammer2_chain_t *live_parent;
679 hammer2_chain_t *live_chain;
680 int bmapindex;
681 int error;
683 kprintf("hammer2_bulkfree - range ");
685 if (cbinfo->sbase < cbinfo->hmp->voldata.allocator_beg)
686 kprintf("%016jx-",
687 (intmax_t)cbinfo->hmp->voldata.allocator_beg);
688 else
689 kprintf("%016jx-",
690 (intmax_t)cbinfo->sbase);
692 if (cbinfo->sstop > cbinfo->hmp->voldata.volu_size)
693 kprintf("%016jx\n",
694 (intmax_t)cbinfo->hmp->voldata.volu_size);
695 else
696 kprintf("%016jx\n",
697 (intmax_t)cbinfo->sstop);
699 data_off = cbinfo->sbase;
700 bmap = cbinfo->bmap;
702 live_parent = &cbinfo->hmp->fchain;
703 hammer2_chain_ref(live_parent);
704 hammer2_chain_lock(live_parent, HAMMER2_RESOLVE_ALWAYS);
705 live_chain = NULL;
706 error = 0;
709 * Iterate each hammer2_bmap_data_t line (128 bytes) managing
710 * 4MB of storage.
712 while (data_off < cbinfo->sstop) {
714 * The freemap is not used below allocator_beg or beyond
715 * volu_size.
718 if (data_off < cbinfo->hmp->voldata.allocator_beg)
719 goto next;
720 if (data_off >= cbinfo->hmp->voldata.volu_size)
721 goto next;
724 * Locate the freemap leaf on the live filesystem
726 key = (data_off & ~HAMMER2_FREEMAP_LEVEL1_MASK);
728 if (live_chain == NULL || live_chain->bref.key != key) {
729 if (live_chain) {
730 hammer2_chain_unlock(live_chain);
731 hammer2_chain_drop(live_chain);
733 live_chain = hammer2_chain_lookup(
734 &live_parent,
735 &key_dummy,
736 key,
737 key + HAMMER2_FREEMAP_LEVEL1_MASK,
738 &error,
739 HAMMER2_LOOKUP_ALWAYS);
740 if (error) {
741 kprintf("hammer2_bulkfree: freemap lookup "
742 "error near %016jx, error %s\n",
743 (intmax_t)data_off,
744 hammer2_error_str(live_chain->error));
745 break;
748 if (live_chain == NULL) {
750 * XXX if we implement a full recovery mode we need
751 * to create/recreate missing freemap chains if our
752 * bmap has any allocated blocks.
754 if (bmap->class &&
755 bmap->avail != HAMMER2_FREEMAP_LEVEL0_SIZE) {
756 kprintf("hammer2_bulkfree: cannot locate "
757 "live leaf for allocated data "
758 "near %016jx\n",
759 (intmax_t)data_off);
761 goto next;
763 if (live_chain->error) {
764 kprintf("hammer2_bulkfree: unable to access freemap "
765 "near %016jx, error %s\n",
766 (intmax_t)data_off,
767 hammer2_error_str(live_chain->error));
768 hammer2_chain_unlock(live_chain);
769 hammer2_chain_drop(live_chain);
770 live_chain = NULL;
771 goto next;
774 bmapindex = (data_off & HAMMER2_FREEMAP_LEVEL1_MASK) >>
775 HAMMER2_FREEMAP_LEVEL0_RADIX;
776 live = &live_chain->data->bmdata[bmapindex];
779 * Shortcut if the bitmaps match and the live linear
780 * indicator is sane. We can't do a perfect check of
781 * live->linear because the only real requirement is that
782 * if it is not block-aligned, that it not cover the space
783 * within its current block which overlaps one of the data
784 * ranges we scan. We don't retain enough fine-grained
785 * data in our scan to be able to set it exactly.
787 * TODO - we could shortcut this by testing that both
788 * live->class and bmap->class are 0, and both avails are
789 * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB).
791 if (bcmp(live->bitmapq, bmap->bitmapq,
792 sizeof(bmap->bitmapq)) == 0 &&
793 live->linear >= bmap->linear) {
794 goto next;
796 if (hammer2_debug & 1) {
797 kprintf("live %016jx %04d.%04x (avail=%d)\n",
798 data_off, bmapindex, live->class, live->avail);
801 hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0);
802 live = &live_chain->data->bmdata[bmapindex];
804 h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
805 live_chain->bref.key +
806 bmapindex *
807 HAMMER2_FREEMAP_LEVEL0_SIZE);
808 next:
809 data_off += HAMMER2_FREEMAP_LEVEL0_SIZE;
810 ++bmap;
812 if (live_chain) {
813 hammer2_chain_unlock(live_chain);
814 hammer2_chain_drop(live_chain);
816 if (live_parent) {
817 hammer2_chain_unlock(live_parent);
818 hammer2_chain_drop(live_parent);
820 return error;
824 * Merge the bulkfree bitmap against the existing bitmap.
826 static
827 void
828 h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
829 hammer2_off_t data_off, hammer2_bmap_data_t *live,
830 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base)
832 int bindex;
833 int scount;
834 hammer2_off_t tmp_off;
835 hammer2_bitmap_t lmask;
836 hammer2_bitmap_t mmask;
838 tmp_off = data_off;
840 for (bindex = 0; bindex < HAMMER2_BMAP_ELEMENTS; ++bindex) {
841 lmask = live->bitmapq[bindex]; /* live */
842 mmask = bmap->bitmapq[bindex]; /* snapshotted bulkfree */
843 if (lmask == mmask) {
844 tmp_off += HAMMER2_BMAP_INDEX_SIZE;
845 continue;
848 for (scount = 0;
849 scount < HAMMER2_BMAP_BITS_PER_ELEMENT;
850 scount += 2) {
851 if ((mmask & 3) == 0) {
853 * in-memory 00 live 11 -> 10
854 * live 10 -> 00
856 * Storage might be marked allocated or
857 * staged and must be remarked staged or
858 * free.
860 switch (lmask & 3) {
861 case 0: /* 00 */
862 break;
863 case 1: /* 01 */
864 kprintf("hammer2_bulkfree: cannot "
865 "transition m=00/l=01\n");
866 break;
867 case 2: /* 10 -> 00 */
868 live->bitmapq[bindex] &=
869 ~((hammer2_bitmap_t)2 << scount);
870 live->avail +=
871 HAMMER2_FREEMAP_BLOCK_SIZE;
872 if (live->avail >
873 HAMMER2_FREEMAP_LEVEL0_SIZE) {
874 live->avail =
875 HAMMER2_FREEMAP_LEVEL0_SIZE;
877 cbinfo->adj_free +=
878 HAMMER2_FREEMAP_BLOCK_SIZE;
879 ++cbinfo->count_10_00;
880 hammer2_io_dedup_assert(
881 cbinfo->hmp,
882 tmp_off |
883 HAMMER2_FREEMAP_BLOCK_RADIX,
884 HAMMER2_FREEMAP_BLOCK_SIZE);
885 break;
886 case 3: /* 11 -> 10 */
887 live->bitmapq[bindex] &=
888 ~((hammer2_bitmap_t)1 << scount);
889 ++cbinfo->count_11_10;
890 hammer2_io_dedup_delete(
891 cbinfo->hmp,
892 HAMMER2_BREF_TYPE_DATA,
893 tmp_off |
894 HAMMER2_FREEMAP_BLOCK_RADIX,
895 HAMMER2_FREEMAP_BLOCK_SIZE);
896 break;
898 } else if ((mmask & 3) == 3) {
900 * in-memory 11 live 10 -> 11
901 * live ** -> 11
903 * Storage might be incorrectly marked free
904 * or staged and must be remarked fully
905 * allocated.
907 switch (lmask & 3) {
908 case 0: /* 00 */
909 ++cbinfo->count_00_11;
910 cbinfo->adj_free -=
911 HAMMER2_FREEMAP_BLOCK_SIZE;
912 live->avail -=
913 HAMMER2_FREEMAP_BLOCK_SIZE;
914 if ((int32_t)live->avail < 0)
915 live->avail = 0;
916 break;
917 case 1: /* 01 */
918 ++cbinfo->count_01_11;
919 break;
920 case 2: /* 10 -> 11 */
921 ++cbinfo->count_10_11;
922 break;
923 case 3: /* 11 */
924 break;
926 live->bitmapq[bindex] |=
927 ((hammer2_bitmap_t)3 << scount);
929 mmask >>= 2;
930 lmask >>= 2;
931 tmp_off += HAMMER2_FREEMAP_BLOCK_SIZE;
936 * Determine if the live bitmap is completely free and reset its
937 * fields if so. Otherwise check to see if we can reduce the linear
938 * offset.
940 for (bindex = HAMMER2_BMAP_ELEMENTS - 1; bindex >= 0; --bindex) {
941 if (live->bitmapq[bindex] != 0)
942 break;
944 if (bindex < 0) {
946 * Completely empty, reset entire segment
948 #if 0
949 kprintf("hammer2: cleanseg %016jx.%04x (%d)\n",
950 alloc_base, live->class, live->avail);
951 #endif
952 live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
953 live->class = 0;
954 live->linear = 0;
955 ++cbinfo->count_l0cleans;
956 } else if (bindex < 7) {
958 * Partially full, bitmapq[bindex] != 0. The live->linear
959 * offset can legitimately be just about anything, but
960 * our bulkfree pass doesn't record enough information to
961 * set it exactly. Just make sure that it is set to a
962 * safe value that also works in our match code above (the
963 * bcmp and linear test).
965 * We cannot safely leave live->linear at a sub-block offset
966 * unless it is already in the same block as bmap->linear.
968 * If it is not in the same block, we cannot assume that
969 * we can set it to bmap->linear on a sub-block boundary,
970 * because the live system could have bounced it around.
971 * In that situation we satisfy our bcmp/skip requirement
972 * above by setting it to the nearest higher block boundary.
973 * This alignment effectively kills any partial allocation it
974 * might have been tracking before.
976 if (live->linear < bmap->linear &&
977 ((live->linear ^ bmap->linear) &
978 ~HAMMER2_FREEMAP_BLOCK_MASK) == 0) {
979 live->linear = bmap->linear;
980 ++cbinfo->count_linadjusts;
981 } else {
982 live->linear =
983 (bmap->linear + HAMMER2_FREEMAP_BLOCK_MASK) &
984 ~HAMMER2_FREEMAP_BLOCK_MASK;
985 ++cbinfo->count_linadjusts;
987 } else {
989 * Completely full, effectively disable the linear iterator
991 live->linear = HAMMER2_SEGSIZE;
994 #if 0
995 if (bmap->class) {
996 kprintf("%016jx %04d.%04x (avail=%7d) "
997 "%08x %08x %08x %08x %08x %08x %08x %08x\n",
998 (intmax_t)data_off,
999 (int)((data_off &
1000 HAMMER2_FREEMAP_LEVEL1_MASK) >>
1001 HAMMER2_FREEMAP_LEVEL0_RADIX),
1002 bmap->class,
1003 bmap->avail,
1004 bmap->bitmap[0], bmap->bitmap[1],
1005 bmap->bitmap[2], bmap->bitmap[3],
1006 bmap->bitmap[4], bmap->bitmap[5],
1007 bmap->bitmap[6], bmap->bitmap[7]);
1009 #endif
1013 * BULKFREE DEDUP HEURISTIC
1015 * WARNING! This code is SMP safe but the heuristic allows SMP collisions.
1016 * All fields must be loaded into locals and validated.
1018 static
1020 h2_bulkfree_test(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref,
1021 int pri)
1023 hammer2_dedup_t *dedup;
1024 int best;
1025 int n;
1026 int i;
1028 n = hammer2_icrc32(&bref->data_off, sizeof(bref->data_off));
1029 dedup = cbinfo->dedup + (n & (HAMMER2_DEDUP_HEUR_MASK & ~7));
1031 for (i = best = 0; i < 8; ++i) {
1032 if (dedup[i].data_off == bref->data_off) {
1033 if (dedup[i].ticks < pri)
1034 dedup[i].ticks = pri;
1035 if (pri == 1)
1036 cbinfo->count_dedup_factor += dedup[i].ticks;
1037 return 1;
1039 if (dedup[i].ticks < dedup[best].ticks)
1040 best = i;
1042 dedup[best].data_off = bref->data_off;
1043 dedup[best].ticks = pri;
1045 return 0;