2 * Copyright (c) 2013-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/fcntl.h>
40 #include <sys/namei.h>
41 #include <sys/mount.h>
42 #include <sys/vnode.h>
43 #include <sys/mountctl.h>
44 #include <vm/vm_kern.h>
45 #include <vm/vm_extern.h>
49 #define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
50 #define H2FMSHIFT(radix) ((hammer2_off_t)1 << (radix))
53 * breadth-first search
55 typedef struct hammer2_chain_save
{
56 TAILQ_ENTRY(hammer2_chain_save
) entry
;
57 hammer2_chain_t
*chain
;
59 } hammer2_chain_save_t
;
61 TAILQ_HEAD(hammer2_chain_save_list
, hammer2_chain_save
);
62 typedef struct hammer2_chain_save_list hammer2_chain_save_list_t
;
64 typedef struct hammer2_bulkfree_info
{
67 hammer2_off_t sbase
; /* sub-loop iteration */
69 hammer2_bmap_data_t
*bmap
;
71 long count_10_00
; /* staged->free */
72 long count_11_10
; /* allocated->staged */
73 long count_00_11
; /* (should not happen) */
74 long count_01_11
; /* (should not happen) */
75 long count_10_11
; /* staged->allocated */
77 long count_linadjusts
;
78 long count_inodes_scanned
;
79 long count_dedup_factor
;
81 hammer2_off_t adj_free
;
83 hammer2_tid_t saved_mirror_tid
;
85 hammer2_chain_save_list_t list
;
86 hammer2_dedup_t
*dedup
;
88 } hammer2_bulkfree_info_t
;
90 static int h2_bulkfree_test(hammer2_bulkfree_info_t
*info
,
91 hammer2_blockref_t
*bref
, int pri
);
94 * General bulk scan function with callback. Called with a referenced
95 * but UNLOCKED parent. The parent is returned in the same state.
99 hammer2_bulk_scan(hammer2_chain_t
*parent
,
100 int (*func
)(hammer2_bulkfree_info_t
*info
,
101 hammer2_blockref_t
*bref
),
102 hammer2_bulkfree_info_t
*info
)
104 hammer2_blockref_t bref
;
105 hammer2_chain_t
*chain
;
106 int cache_index
= -1;
112 hammer2_chain_lock(parent
, HAMMER2_RESOLVE_ALWAYS
|
113 HAMMER2_RESOLVE_SHARED
);
117 * Generally loop on the contents if we have not been flagged
120 * Remember that these chains are completely isolated from
121 * the frontend, so we can release locks temporarily without
124 while ((doabort
& HAMMER2_BULK_ABORT
) == 0 &&
125 hammer2_chain_scan(parent
, &chain
, &bref
, &first
,
127 HAMMER2_LOOKUP_NODATA
|
128 HAMMER2_LOOKUP_SHARED
) != NULL
) {
130 * Process bref, chain is only non-NULL if the bref
131 * might be recursable (its possible that we sometimes get
132 * a non-NULL chain where the bref cannot be recursed).
135 kprintf("SCAN %016jx\n", bref
.data_off
);
136 int xerr
= tsleep(&info
->pri
, PCATCH
, "slp", hz
/ 10);
137 if (xerr
== EINTR
|| xerr
== ERESTART
) {
138 doabort
|= HAMMER2_BULK_ABORT
;
142 if (h2_bulkfree_test(info
, &bref
, 1))
145 doabort
|= func(info
, &bref
);
147 if (doabort
& HAMMER2_BULK_ABORT
)
151 * A non-null chain is always returned if it is
152 * recursive, otherwise a non-null chain might be
153 * returned but usually is not when not recursive.
159 * Else check type and setup depth-first scan.
161 * Account for bytes actually read.
163 info
->bytes_scanned
+= chain
->bytes
;
165 switch(chain
->bref
.type
) {
166 case HAMMER2_BREF_TYPE_INODE
:
167 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
168 case HAMMER2_BREF_TYPE_INDIRECT
:
169 case HAMMER2_BREF_TYPE_VOLUME
:
170 case HAMMER2_BREF_TYPE_FREEMAP
:
172 if (info
->depth
> 16) {
173 hammer2_chain_save_t
*save
;
174 save
= kmalloc(sizeof(*save
), M_HAMMER2
,
177 hammer2_chain_ref(chain
);
178 TAILQ_INSERT_TAIL(&info
->list
, save
, entry
);
183 int savepri
= info
->pri
;
185 hammer2_chain_unlock(chain
);
187 doabort
|= hammer2_bulk_scan(chain
, func
, info
);
188 info
->pri
+= savepri
;
189 hammer2_chain_lock(chain
,
190 HAMMER2_RESOLVE_ALWAYS
|
191 HAMMER2_RESOLVE_SHARED
);
196 /* does not recurse */
201 hammer2_chain_unlock(chain
);
202 hammer2_chain_drop(chain
);
206 * Save with higher pri now that we know what it is.
208 h2_bulkfree_test(info
, &parent
->bref
, info
->pri
+ 1);
210 hammer2_chain_unlock(parent
);
219 * Chain flush (partial synchronization) XXX removed
220 * Scan the whole topology - build in-memory freemap (mark 11)
221 * Reconcile the in-memory freemap against the on-disk freemap.
222 * ondisk xx -> ondisk 11 (if allocated)
223 * ondisk 11 -> ondisk 10 (if free in-memory)
224 * ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
227 * The topology scan may have to be performed multiple times to window
228 * freemaps which are too large to fit in kernel memory.
230 * Races are handled using a double-transition (11->10, 10->00). The bulkfree
231 * scan snapshots the volume root's blockset and thus can run concurrent with
232 * normal operations, as long as a full flush is made between each pass to
233 * synchronize any modified chains (otherwise their blocks might be improperly
236 * Temporary memory in multiples of 64KB is required to reconstruct the leaf
237 * hammer2_bmap_data blocks so they can later be compared against the live
238 * freemap. Each 64KB block represents 128 x 16KB x 1024 = ~2 GB of storage.
239 * A 32MB save area thus represents around ~1 TB. The temporary memory
240 * allocated can be specified. If it is not sufficient multiple topology
241 * passes will be made.
245 * Bulkfree callback info
247 static void hammer2_bulkfree_thread(void *arg __unused
);
248 static void cbinfo_bmap_init(hammer2_bulkfree_info_t
*cbinfo
, size_t size
);
249 static int h2_bulkfree_callback(hammer2_bulkfree_info_t
*cbinfo
,
250 hammer2_blockref_t
*bref
);
251 static void h2_bulkfree_sync(hammer2_bulkfree_info_t
*cbinfo
);
252 static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t
*cbinfo
,
253 hammer2_off_t data_off
, hammer2_bmap_data_t
*live
,
254 hammer2_bmap_data_t
*bmap
, int nofree
);
257 hammer2_bulkfree_init(hammer2_dev_t
*hmp
)
259 hammer2_thr_create(&hmp
->bfthr
, NULL
, hmp
,
260 hmp
->devrepname
, -1, -1,
261 hammer2_bulkfree_thread
);
265 hammer2_bulkfree_uninit(hammer2_dev_t
*hmp
)
267 hammer2_thr_delete(&hmp
->bfthr
);
271 hammer2_bulkfree_thread(void *arg
)
273 hammer2_thread_t
*thr
= arg
;
274 hammer2_ioc_bulkfree_t bfi
;
278 hammer2_thr_wait_any(thr
,
279 HAMMER2_THREAD_STOP
|
280 HAMMER2_THREAD_FREEZE
|
281 HAMMER2_THREAD_UNFREEZE
|
282 HAMMER2_THREAD_REMASTER
,
287 if (flags
& HAMMER2_THREAD_STOP
)
289 if (flags
& HAMMER2_THREAD_FREEZE
) {
290 hammer2_thr_signal2(thr
, HAMMER2_THREAD_FROZEN
,
291 HAMMER2_THREAD_FREEZE
);
294 if (flags
& HAMMER2_THREAD_UNFREEZE
) {
295 hammer2_thr_signal2(thr
, 0,
296 HAMMER2_THREAD_FROZEN
|
297 HAMMER2_THREAD_UNFREEZE
);
300 if (flags
& HAMMER2_THREAD_FROZEN
)
302 if (flags
& HAMMER2_THREAD_REMASTER
) {
303 hammer2_thr_signal2(thr
, 0, HAMMER2_THREAD_REMASTER
);
304 bzero(&bfi
, sizeof(bfi
));
305 bfi
.size
= 8192 * 1024;
306 /* hammer2_bulkfree_pass(thr->hmp, &bfi); */
310 hammer2_thr_signal(thr
, HAMMER2_THREAD_STOPPED
);
311 /* structure can go invalid at this point */
315 hammer2_bulkfree_pass(hammer2_dev_t
*hmp
, hammer2_chain_t
*vchain
,
316 hammer2_ioc_bulkfree_t
*bfi
)
318 hammer2_bulkfree_info_t cbinfo
;
319 hammer2_chain_save_t
*save
;
325 * We have to clear the live dedup cache as it might have entries
326 * that are freeable as of now. Any new entries in the dedup cache
327 * made after this point, even if they become freeable, will have
328 * previously been fully allocated and will be protected by the
331 hammer2_dedup_clear(hmp
);
334 * Setup for free pass
336 bzero(&cbinfo
, sizeof(cbinfo
));
337 size
= (bfi
->size
+ HAMMER2_FREEMAP_LEVELN_PSIZE
- 1) &
338 ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE
- 1);
340 cbinfo
.bmap
= kmem_alloc_swapbacked(&cbinfo
.kp
, size
, VM_SUBSYS_HAMMER
);
341 cbinfo
.saved_mirror_tid
= hmp
->voldata
.mirror_tid
;
343 cbinfo
.dedup
= kmalloc(sizeof(*cbinfo
.dedup
) * HAMMER2_DEDUP_HEUR_SIZE
,
344 M_HAMMER2
, M_WAITOK
| M_ZERO
);
347 * Normalize start point to a 2GB boundary. We operate on a
348 * 64KB leaf bitmap boundary which represents 2GB of storage.
350 cbinfo
.sbase
= bfi
->sbase
;
351 if (cbinfo
.sbase
> hmp
->voldata
.volu_size
)
352 cbinfo
.sbase
= hmp
->voldata
.volu_size
;
353 cbinfo
.sbase
&= ~HAMMER2_FREEMAP_LEVEL1_MASK
;
354 TAILQ_INIT(&cbinfo
.list
);
357 * Loop on a full meta-data scan as many times as required to
358 * get through all available storage.
360 while (cbinfo
.sbase
< hmp
->voldata
.volu_size
) {
362 * We have enough ram to represent (incr) bytes of storage.
363 * Each 64KB of ram represents 2GB of storage.
365 cbinfo_bmap_init(&cbinfo
, size
);
366 incr
= size
/ HAMMER2_FREEMAP_LEVELN_PSIZE
*
367 HAMMER2_FREEMAP_LEVEL1_SIZE
;
368 if (hmp
->voldata
.volu_size
- cbinfo
.sbase
< incr
)
369 cbinfo
.sstop
= hmp
->voldata
.volu_size
;
371 cbinfo
.sstop
= cbinfo
.sbase
+ incr
;
372 if (hammer2_debug
& 1) {
373 kprintf("bulkfree pass %016jx/%jdGB\n",
374 (intmax_t)cbinfo
.sbase
,
375 (intmax_t)incr
/ HAMMER2_FREEMAP_LEVEL1_SIZE
);
379 * Scan topology for stuff inside this range.
381 hammer2_trans_init(hmp
->spmp
, 0);
382 cbinfo
.mtid
= hammer2_trans_sub(hmp
->spmp
);
384 doabort
|= hammer2_bulk_scan(vchain
, h2_bulkfree_callback
,
387 while ((save
= TAILQ_FIRST(&cbinfo
.list
)) != NULL
&&
389 TAILQ_REMOVE(&cbinfo
.list
, save
, entry
);
391 doabort
|= hammer2_bulk_scan(save
->chain
,
392 h2_bulkfree_callback
,
394 hammer2_chain_drop(save
->chain
);
395 kfree(save
, M_HAMMER2
);
398 TAILQ_REMOVE(&cbinfo
.list
, save
, entry
);
399 hammer2_chain_drop(save
->chain
);
400 kfree(save
, M_HAMMER2
);
401 save
= TAILQ_FIRST(&cbinfo
.list
);
404 kprintf("bulkfree lastdrop %d %d doabort=%d\n",
405 vchain
->refs
, vchain
->core
.chain_count
, doabort
);
408 * If complete scan succeeded we can synchronize our
409 * in-memory freemap against live storage. If an abort
410 * did occur we cannot safely synchronize our partially
411 * filled-out in-memory freemap.
414 h2_bulkfree_sync(&cbinfo
);
416 hammer2_voldata_lock(hmp
);
417 hammer2_voldata_modify(hmp
);
418 hmp
->voldata
.allocator_free
+= cbinfo
.adj_free
;
419 hammer2_voldata_unlock(hmp
);
423 * Cleanup for next loop.
425 hammer2_trans_done(hmp
->spmp
);
428 cbinfo
.sbase
= cbinfo
.sstop
;
431 kmem_free_swapbacked(&cbinfo
.kp
);
432 kfree(cbinfo
.dedup
, M_HAMMER2
);
435 bfi
->sstop
= cbinfo
.sbase
;
437 incr
= bfi
->sstop
/ (hmp
->voldata
.volu_size
/ 10000);
441 kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n",
445 kprintf(" transition->free %ld\n", cbinfo
.count_10_00
);
446 kprintf(" transition->staged %ld\n", cbinfo
.count_11_10
);
447 kprintf(" ERR(00)->allocated %ld\n", cbinfo
.count_00_11
);
448 kprintf(" ERR(01)->allocated %ld\n", cbinfo
.count_01_11
);
449 kprintf(" staged->allocated %ld\n", cbinfo
.count_10_11
);
450 kprintf(" ~2MB segs cleaned %ld\n", cbinfo
.count_l0cleans
);
451 kprintf(" linear adjusts %ld\n", cbinfo
.count_linadjusts
);
452 kprintf(" dedup factor %ld\n", cbinfo
.count_dedup_factor
);
458 cbinfo_bmap_init(hammer2_bulkfree_info_t
*cbinfo
, size_t size
)
460 hammer2_bmap_data_t
*bmap
= cbinfo
->bmap
;
461 hammer2_key_t key
= cbinfo
->sbase
;
465 lokey
= (cbinfo
->hmp
->voldata
.allocator_beg
+ HAMMER2_SEGMASK64
) &
467 hikey
= cbinfo
->hmp
->voldata
.volu_size
& ~HAMMER2_SEGMASK64
;
471 if (lokey
< H2FMBASE(key
, HAMMER2_FREEMAP_LEVEL1_RADIX
) +
472 HAMMER2_ZONE_SEG64
) {
473 lokey
= H2FMBASE(key
, HAMMER2_FREEMAP_LEVEL1_RADIX
) +
476 if (key
< lokey
|| key
>= hikey
) {
477 memset(bmap
->bitmapq
, -1,
478 sizeof(bmap
->bitmapq
));
480 bmap
->linear
= HAMMER2_SEGSIZE
;
482 bmap
->avail
= H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX
);
484 size
-= sizeof(*bmap
);
485 key
+= HAMMER2_FREEMAP_LEVEL0_SIZE
;
491 h2_bulkfree_callback(hammer2_bulkfree_info_t
*cbinfo
, hammer2_blockref_t
*bref
)
493 hammer2_bmap_data_t
*bmap
;
494 hammer2_off_t data_off
;
501 * Check for signal and allow yield to userland during scan
503 if (hammer2_signal_check(&cbinfo
->save_time
))
504 return HAMMER2_BULK_ABORT
;
505 if (bref
->type
== HAMMER2_BREF_TYPE_INODE
) {
506 ++cbinfo
->count_inodes_scanned
;
507 if ((cbinfo
->count_inodes_scanned
& 65535) == 0)
508 kprintf(" inodes %6ld bytes %9ld\n",
509 cbinfo
->count_inodes_scanned
,
510 cbinfo
->bytes_scanned
);
514 * Calculate the data offset and determine if it is within
515 * the current freemap range being gathered.
518 data_off
= bref
->data_off
& ~HAMMER2_OFF_MASK_RADIX
;
519 if (data_off
< cbinfo
->sbase
|| data_off
>= cbinfo
->sstop
)
521 if (data_off
< cbinfo
->hmp
->voldata
.allocator_beg
)
523 if (data_off
>= cbinfo
->hmp
->voldata
.volu_size
)
527 * Calculate the information needed to generate the in-memory
530 * Hammer2 does not allow allocations to cross the L1 (2GB) boundary,
531 * it's a problem if it does. (Or L0 (2MB) for that matter).
533 radix
= (int)(bref
->data_off
& HAMMER2_OFF_MASK_RADIX
);
534 KKASSERT(radix
!= 0);
535 bytes
= (size_t)1 << radix
;
536 class = (bref
->type
<< 8) | hammer2_devblkradix(radix
);
538 if (data_off
+ bytes
>= cbinfo
->sstop
) {
539 kprintf("hammer2_bulkfree_scan: illegal 2GB boundary "
540 "%016jx %016jx/%d\n",
541 (intmax_t)bref
->data_off
,
544 bytes
= cbinfo
->sstop
- data_off
; /* XXX */
548 * Convert to a storage offset relative to the beginning of the
549 * storage range we are collecting. Then lookup the level0 bmap entry.
551 data_off
-= cbinfo
->sbase
;
552 bmap
= cbinfo
->bmap
+ (data_off
>> HAMMER2_FREEMAP_LEVEL0_RADIX
);
555 * Convert data_off to a bmap-relative value (~2MB storage range).
556 * Adjust linear, class, and avail.
558 * Hammer2 does not allow allocations to cross the L0 (2MB) boundary,
560 data_off
&= HAMMER2_FREEMAP_LEVEL0_MASK
;
561 if (data_off
+ bytes
> HAMMER2_FREEMAP_LEVEL0_SIZE
) {
562 kprintf("hammer2_bulkfree_scan: illegal 2MB boundary "
563 "%016jx %016jx/%d\n",
564 (intmax_t)bref
->data_off
,
567 bytes
= HAMMER2_FREEMAP_LEVEL0_SIZE
- data_off
;
570 if (bmap
->class == 0) {
572 bmap
->avail
= HAMMER2_FREEMAP_LEVEL0_SIZE
;
574 if (bmap
->class != class) {
575 kprintf("hammer2_bulkfree_scan: illegal mixed class "
576 "%016jx %016jx/%d (%04x vs %04x)\n",
577 (intmax_t)bref
->data_off
,
582 if (bmap
->linear
< (int32_t)data_off
+ (int32_t)bytes
)
583 bmap
->linear
= (int32_t)data_off
+ (int32_t)bytes
;
586 * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS].
587 * 64-bit entries, 2 bits per entry, to code 11.
589 * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE.
593 hammer2_bitmap_t bmask
;
595 bindex
= (int)data_off
>> (HAMMER2_FREEMAP_BLOCK_RADIX
+
596 HAMMER2_BMAP_INDEX_RADIX
);
597 bmask
= (hammer2_bitmap_t
)3 <<
598 ((((int)data_off
& HAMMER2_BMAP_INDEX_MASK
) >>
599 HAMMER2_FREEMAP_BLOCK_RADIX
) << 1);
602 * NOTE! The (avail) calculation is bitmap-granular. Multiple
603 * sub-granular records can wind up at the same bitmap
606 if ((bmap
->bitmapq
[bindex
] & bmask
) == 0) {
607 if (bytes
< HAMMER2_FREEMAP_BLOCK_SIZE
) {
608 bmap
->avail
-= HAMMER2_FREEMAP_BLOCK_SIZE
;
610 bmap
->avail
-= bytes
;
612 bmap
->bitmapq
[bindex
] |= bmask
;
614 data_off
+= HAMMER2_FREEMAP_BLOCK_SIZE
;
615 if (bytes
< HAMMER2_FREEMAP_BLOCK_SIZE
)
618 bytes
-= HAMMER2_FREEMAP_BLOCK_SIZE
;
624 * Synchronize the in-memory bitmap with the live freemap. This is not a
625 * direct copy. Instead the bitmaps must be compared:
627 * In-memory Live-freemap
628 * 00 11 -> 10 (do nothing if live modified)
629 * 10 -> 00 (do nothing if live modified)
630 * 11 10 -> 11 handles race against live
631 * ** -> 11 nominally warn of corruption
635 h2_bulkfree_sync(hammer2_bulkfree_info_t
*cbinfo
)
637 hammer2_off_t data_off
;
639 hammer2_key_t key_dummy
;
640 hammer2_bmap_data_t
*bmap
;
641 hammer2_bmap_data_t
*live
;
642 hammer2_chain_t
*live_parent
;
643 hammer2_chain_t
*live_chain
;
644 int cache_index
= -1;
647 kprintf("hammer2_bulkfree - range ");
649 if (cbinfo
->sbase
< cbinfo
->hmp
->voldata
.allocator_beg
)
651 (intmax_t)cbinfo
->hmp
->voldata
.allocator_beg
);
654 (intmax_t)cbinfo
->sbase
);
656 if (cbinfo
->sstop
> cbinfo
->hmp
->voldata
.volu_size
)
658 (intmax_t)cbinfo
->hmp
->voldata
.volu_size
);
661 (intmax_t)cbinfo
->sstop
);
663 data_off
= cbinfo
->sbase
;
666 live_parent
= &cbinfo
->hmp
->fchain
;
667 hammer2_chain_ref(live_parent
);
668 hammer2_chain_lock(live_parent
, HAMMER2_RESOLVE_ALWAYS
);
672 * Iterate each hammer2_bmap_data_t line (128 bytes) managing
675 while (data_off
< cbinfo
->sstop
) {
677 * The freemap is not used below allocator_beg or beyond
682 if (data_off
< cbinfo
->hmp
->voldata
.allocator_beg
)
684 if (data_off
>= cbinfo
->hmp
->voldata
.volu_size
)
688 * Locate the freemap leaf on the live filesystem
690 key
= (data_off
& ~HAMMER2_FREEMAP_LEVEL1_MASK
);
693 if (live_chain
== NULL
|| live_chain
->bref
.key
!= key
) {
695 hammer2_chain_unlock(live_chain
);
696 hammer2_chain_drop(live_chain
);
698 live_chain
= hammer2_chain_lookup(
702 key
+ HAMMER2_FREEMAP_LEVEL1_MASK
,
704 HAMMER2_LOOKUP_ALWAYS
);
708 * If recent allocations were made we avoid races by
709 * not staging or freeing any blocks. We can still
710 * remark blocks as fully allocated.
713 if (hammer2_debug
& 1) {
714 kprintf("live_chain %016jx\n",
717 if (live_chain
->bref
.mirror_tid
>
718 cbinfo
->saved_mirror_tid
) {
719 kprintf("hammer2_bulkfree: "
729 if (live_chain
== NULL
) {
731 * XXX if we implement a full recovery mode we need
732 * to create/recreate missing freemap chains if our
733 * bmap has any allocated blocks.
736 bmap
->avail
!= HAMMER2_FREEMAP_LEVEL0_SIZE
) {
737 kprintf("hammer2_bulkfree: cannot locate "
738 "live leaf for allocated data "
744 if (live_chain
->error
) {
745 kprintf("hammer2_bulkfree: error %s looking up "
746 "live leaf for allocated data near %016jx\n",
747 hammer2_error_str(live_chain
->error
),
749 hammer2_chain_unlock(live_chain
);
750 hammer2_chain_drop(live_chain
);
755 bmapindex
= (data_off
& HAMMER2_FREEMAP_LEVEL1_MASK
) >>
756 HAMMER2_FREEMAP_LEVEL0_RADIX
;
757 live
= &live_chain
->data
->bmdata
[bmapindex
];
760 * TODO - we could shortcut this by testing that both
761 * live->class and bmap->class are 0, and both avails are
762 * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB).
764 if (bcmp(live
->bitmapq
, bmap
->bitmapq
,
765 sizeof(bmap
->bitmapq
)) == 0) {
768 if (hammer2_debug
& 1) {
769 kprintf("live %016jx %04d.%04x (avail=%d)\n",
770 data_off
, bmapindex
, live
->class, live
->avail
);
773 hammer2_chain_modify(live_chain
, cbinfo
->mtid
, 0, 0);
774 live
= &live_chain
->data
->bmdata
[bmapindex
];
776 h2_bulkfree_sync_adjust(cbinfo
, data_off
, live
, bmap
, nofree
);
778 data_off
+= HAMMER2_FREEMAP_LEVEL0_SIZE
;
782 hammer2_chain_unlock(live_chain
);
783 hammer2_chain_drop(live_chain
);
786 hammer2_chain_unlock(live_parent
);
787 hammer2_chain_drop(live_parent
);
792 * When bulkfree is finally able to free a block it must make sure that
793 * the INVALOK bit in any cached DIO is cleared prior to the block being
798 fixup_dio(hammer2_dev_t
*hmp
, hammer2_off_t data_off
, int bindex
, int scount
)
800 data_off
+= (scount
>> 1) * HAMMER2_FREEMAP_BLOCK_SIZE
;
802 (HAMMER2_FREEMAP_BLOCK_SIZE
* HAMMER2_BMAP_BLOCKS_PER_ELEMENT
);
803 hammer2_io_resetinval(hmp
, data_off
);
807 * Merge the bulkfree bitmap against the existing bitmap.
809 * If nofree is non-zero the merge will only mark free blocks as allocated
810 * and will refuse to free any blocks.
814 h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t
*cbinfo
,
815 hammer2_off_t data_off
, hammer2_bmap_data_t
*live
,
816 hammer2_bmap_data_t
*bmap
, int nofree
)
820 hammer2_bitmap_t lmask
;
821 hammer2_bitmap_t mmask
;
823 for (bindex
= 0; bindex
< HAMMER2_BMAP_ELEMENTS
; ++bindex
) {
824 lmask
= live
->bitmapq
[bindex
]; /* live */
825 mmask
= bmap
->bitmapq
[bindex
]; /* snapshotted bulkfree */
830 scount
< HAMMER2_BMAP_BITS_PER_ELEMENT
;
832 if ((mmask
& 3) == 0) {
834 * in-memory 00 live 11 -> 10
837 * Storage might be marked allocated or
838 * staged and must be remarked staged or
845 kprintf("hammer2_bulkfree: cannot "
846 "transition m=00/l=01\n");
848 case 2: /* 10 -> 00 */
851 live
->bitmapq
[bindex
] &=
852 ~((hammer2_bitmap_t
)2 << scount
);
854 HAMMER2_FREEMAP_BLOCK_SIZE
;
856 HAMMER2_FREEMAP_LEVEL0_SIZE
) {
858 HAMMER2_FREEMAP_LEVEL0_SIZE
;
861 HAMMER2_FREEMAP_BLOCK_SIZE
;
862 ++cbinfo
->count_10_00
;
864 case 3: /* 11 -> 10 */
865 live
->bitmapq
[bindex
] &=
866 ~((hammer2_bitmap_t
)1 << scount
);
867 ++cbinfo
->count_11_10
;
868 fixup_dio(cbinfo
->hmp
, data_off
,
872 } else if ((mmask
& 3) == 3) {
874 * in-memory 11 live 10 -> 11
877 * Storage might be incorrectly marked free
878 * or staged and must be remarked fully
883 ++cbinfo
->count_00_11
;
885 HAMMER2_FREEMAP_BLOCK_SIZE
;
887 HAMMER2_FREEMAP_BLOCK_SIZE
;
888 if ((int32_t)live
->avail
< 0)
892 ++cbinfo
->count_01_11
;
894 case 2: /* 10 -> 11 */
895 ++cbinfo
->count_10_11
;
900 live
->bitmapq
[bindex
] |=
901 ((hammer2_bitmap_t
)3 << scount
);
909 * Determine if the live bitmap is completely free and reset its
910 * fields if so. Otherwise check to see if we can reduce the linear
913 for (bindex
= HAMMER2_BMAP_ELEMENTS
- 1; bindex
>= 0; --bindex
) {
914 if (live
->bitmapq
[bindex
] != 0)
919 } else if (bindex
< 0) {
920 live
->avail
= HAMMER2_FREEMAP_LEVEL0_SIZE
;
923 ++cbinfo
->count_l0cleans
;
924 } else if (bindex
< 7) {
926 if (live
->linear
> bindex
* HAMMER2_FREEMAP_BLOCK_SIZE
) {
927 live
->linear
= bindex
* HAMMER2_FREEMAP_BLOCK_SIZE
;
928 ++cbinfo
->count_linadjusts
;
932 * XXX this fine-grained measure still has some issues.
934 if (live
->linear
< bindex
* HAMMER2_FREEMAP_BLOCK_SIZE
) {
935 live
->linear
= bindex
* HAMMER2_FREEMAP_BLOCK_SIZE
;
936 ++cbinfo
->count_linadjusts
;
939 live
->linear
= HAMMER2_SEGSIZE
;
944 kprintf("%016jx %04d.%04x (avail=%7d) "
945 "%08x %08x %08x %08x %08x %08x %08x %08x\n",
948 HAMMER2_FREEMAP_LEVEL1_MASK
) >>
949 HAMMER2_FREEMAP_LEVEL0_RADIX
),
952 bmap
->bitmap
[0], bmap
->bitmap
[1],
953 bmap
->bitmap
[2], bmap
->bitmap
[3],
954 bmap
->bitmap
[4], bmap
->bitmap
[5],
955 bmap
->bitmap
[6], bmap
->bitmap
[7]);
961 * BULKFREE DEDUP HEURISTIC
963 * WARNING! This code is SMP safe but the heuristic allows SMP collisions.
964 * All fields must be loaded into locals and validated.
968 h2_bulkfree_test(hammer2_bulkfree_info_t
*cbinfo
, hammer2_blockref_t
*bref
,
971 hammer2_dedup_t
*dedup
;
976 n
= hammer2_icrc32(&bref
->data_off
, sizeof(bref
->data_off
));
977 dedup
= cbinfo
->dedup
+ (n
& (HAMMER2_DEDUP_HEUR_MASK
& ~7));
979 for (i
= best
= 0; i
< 8; ++i
) {
980 if (dedup
[i
].data_off
== bref
->data_off
) {
981 if (dedup
[i
].ticks
< pri
)
982 dedup
[i
].ticks
= pri
;
984 cbinfo
->count_dedup_factor
+= dedup
[i
].ticks
;
987 if (dedup
[i
].ticks
< dedup
[best
].ticks
)
990 dedup
[best
].data_off
= bref
->data_off
;
991 dedup
[best
].ticks
= pri
;