2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <vm/vm_page2.h>
42 static int hammer_res_rb_compare(hammer_reserve_t res1
, hammer_reserve_t res2
);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp
,
44 hammer_off_t base_offset
, int zone
,
45 hammer_blockmap_layer2_t layer2
);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp
, hammer_reserve_t resv
);
47 static int hammer_check_volume(hammer_mount_t
, hammer_off_t
*);
48 static void hammer_skip_volume(hammer_off_t
*offsetp
);
51 * Reserved big-blocks red-black tree support
53 RB_GENERATE2(hammer_res_rb_tree
, hammer_reserve
, rb_node
,
54 hammer_res_rb_compare
, hammer_off_t
, zone_offset
);
57 hammer_res_rb_compare(hammer_reserve_t res1
, hammer_reserve_t res2
)
59 if (res1
->zone_offset
< res2
->zone_offset
)
61 if (res1
->zone_offset
> res2
->zone_offset
)
67 * Allocate bytes from a zone
70 hammer_blockmap_alloc(hammer_transaction_t trans
, int zone
, int bytes
,
71 hammer_off_t hint
, int *errorp
)
74 hammer_volume_t root_volume
;
75 hammer_blockmap_t blockmap
;
76 hammer_blockmap_t freemap
;
77 hammer_reserve_t resv
;
78 hammer_blockmap_layer1_t layer1
;
79 hammer_blockmap_layer2_t layer2
;
80 hammer_buffer_t buffer1
= NULL
;
81 hammer_buffer_t buffer2
= NULL
;
82 hammer_buffer_t buffer3
= NULL
;
83 hammer_off_t tmp_offset
;
84 hammer_off_t next_offset
;
85 hammer_off_t result_offset
;
86 hammer_off_t layer1_offset
;
87 hammer_off_t layer2_offset
;
88 hammer_off_t base_off
;
90 int offset
; /* offset within big-block */
96 * Deal with alignment and buffer-boundary issues.
98 * Be careful, certain primary alignments are used below to allocate
99 * new blockmap blocks.
101 bytes
= HAMMER_DATA_DOALIGN(bytes
);
102 KKASSERT(bytes
> 0 && bytes
<= HAMMER_XBUFSIZE
);
103 KKASSERT(hammer_is_index_record(zone
));
108 root_volume
= trans
->rootvol
;
110 blockmap
= &hmp
->blockmap
[zone
];
111 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
112 KKASSERT(HAMMER_ZONE_DECODE(blockmap
->next_offset
) == zone
);
115 * Use the hint if we have one.
117 if (hint
&& HAMMER_ZONE_DECODE(hint
) == zone
) {
118 next_offset
= HAMMER_DATA_DOALIGN_WITH(hammer_off_t
, hint
);
121 next_offset
= blockmap
->next_offset
;
127 * use_hint is turned off if we leave the hinted big-block.
129 if (use_hint
&& ((next_offset
^ hint
) & ~HAMMER_HINTBLOCK_MASK64
)) {
130 next_offset
= blockmap
->next_offset
;
137 if (next_offset
== HAMMER_ZONE_ENCODE(zone
+ 1, 0)) {
139 hmkprintf(hmp
, "No space left for zone %d "
140 "allocation\n", zone
);
145 next_offset
= HAMMER_ZONE_ENCODE(zone
, 0);
149 * The allocation request may not cross a buffer boundary. Special
150 * large allocations must not cross a big-block boundary.
152 tmp_offset
= next_offset
+ bytes
- 1;
153 if (bytes
<= HAMMER_BUFSIZE
) {
154 if ((next_offset
^ tmp_offset
) & ~HAMMER_BUFMASK64
) {
155 next_offset
= tmp_offset
& ~HAMMER_BUFMASK64
;
159 if ((next_offset
^ tmp_offset
) & ~HAMMER_BIGBLOCK_MASK64
) {
160 next_offset
= tmp_offset
& ~HAMMER_BIGBLOCK_MASK64
;
164 offset
= (int)next_offset
& HAMMER_BIGBLOCK_MASK
;
169 layer1_offset
= freemap
->phys_offset
+
170 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset
);
172 layer1
= hammer_bread(hmp
, layer1_offset
, errorp
, &buffer1
);
181 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
182 hammer_lock_ex(&hmp
->blkmap_lock
);
183 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
184 hpanic("CRC FAILED: LAYER1");
185 hammer_unlock(&hmp
->blkmap_lock
);
189 * If we are at a big-block boundary and layer1 indicates no
190 * free big-blocks, then we cannot allocate a new big-block in
191 * layer2, skip to the next layer1 entry.
193 if (offset
== 0 && layer1
->blocks_free
== 0) {
194 next_offset
= HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset
);
195 if (hammer_check_volume(hmp
, &next_offset
)) {
201 KKASSERT(layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
204 * Skip the whole volume if it is pointing to a layer2 big-block
205 * on a volume that we are currently trying to remove from the
206 * file-system. This is used by the volume-del code together with
207 * the reblocker to free up a volume.
209 if (HAMMER_VOL_DECODE(layer1
->phys_offset
) == hmp
->volume_to_remove
) {
210 hammer_skip_volume(&next_offset
);
215 * Dive layer 2, each entry represents a big-block.
217 layer2_offset
= layer1
->phys_offset
+
218 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset
);
219 layer2
= hammer_bread(hmp
, layer2_offset
, errorp
, &buffer2
);
226 * Check CRC. This can race another thread holding the lock
227 * and in the middle of modifying layer2.
229 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
230 hammer_lock_ex(&hmp
->blkmap_lock
);
231 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
232 hpanic("CRC FAILED: LAYER2");
233 hammer_unlock(&hmp
->blkmap_lock
);
237 * Skip the layer if the zone is owned by someone other then us.
239 if (layer2
->zone
&& layer2
->zone
!= zone
) {
240 next_offset
+= (HAMMER_BIGBLOCK_SIZE
- offset
);
243 if (offset
< layer2
->append_off
) {
244 next_offset
+= layer2
->append_off
- offset
;
250 * If operating in the current non-hint blockmap block, do not
251 * allow it to get over-full. Also drop any active hinting so
252 * blockmap->next_offset is updated at the end.
254 * We do this for B-Tree and meta-data allocations to provide
255 * localization for updates.
257 if ((zone
== HAMMER_ZONE_BTREE_INDEX
||
258 zone
== HAMMER_ZONE_META_INDEX
) &&
259 offset
>= HAMMER_BIGBLOCK_OVERFILL
&&
260 !((next_offset
^ blockmap
->next_offset
) & ~HAMMER_BIGBLOCK_MASK64
)) {
261 if (offset
>= HAMMER_BIGBLOCK_OVERFILL
) {
262 next_offset
+= (HAMMER_BIGBLOCK_SIZE
- offset
);
270 * We need the lock from this point on. We have to re-check zone
271 * ownership after acquiring the lock and also check for reservations.
273 hammer_lock_ex(&hmp
->blkmap_lock
);
275 if (layer2
->zone
&& layer2
->zone
!= zone
) {
276 hammer_unlock(&hmp
->blkmap_lock
);
277 next_offset
+= (HAMMER_BIGBLOCK_SIZE
- offset
);
280 if (offset
< layer2
->append_off
) {
281 hammer_unlock(&hmp
->blkmap_lock
);
282 next_offset
+= layer2
->append_off
- offset
;
287 * The big-block might be reserved by another zone. If it is reserved
288 * by our zone we may have to move next_offset past the append_off.
290 base_off
= hammer_xlate_to_zone2(next_offset
& ~HAMMER_BIGBLOCK_MASK64
);
291 resv
= RB_LOOKUP(hammer_res_rb_tree
, &hmp
->rb_resv_root
, base_off
);
293 if (resv
->zone
!= zone
) {
294 hammer_unlock(&hmp
->blkmap_lock
);
295 next_offset
= HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset
);
298 if (offset
< resv
->append_off
) {
299 hammer_unlock(&hmp
->blkmap_lock
);
300 next_offset
+= resv
->append_off
- offset
;
307 * Ok, we can allocate out of this layer2 big-block. Assume ownership
308 * of the layer for real. At this point we've validated any
309 * reservation that might exist and can just ignore resv.
311 if (layer2
->zone
== 0) {
313 * Assign the big-block to our zone
315 hammer_modify_buffer(trans
, buffer1
, layer1
, sizeof(*layer1
));
316 --layer1
->blocks_free
;
317 hammer_crc_set_layer1(hmp
->version
, layer1
);
318 hammer_modify_buffer_done(buffer1
);
319 hammer_modify_buffer(trans
, buffer2
, layer2
, sizeof(*layer2
));
321 KKASSERT(layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
);
322 KKASSERT(layer2
->append_off
== 0);
323 hammer_modify_volume_field(trans
, trans
->rootvol
,
324 vol0_stat_freebigblocks
);
325 --root_volume
->ondisk
->vol0_stat_freebigblocks
;
326 hmp
->copy_stat_freebigblocks
=
327 root_volume
->ondisk
->vol0_stat_freebigblocks
;
328 hammer_modify_volume_done(trans
->rootvol
);
330 hammer_modify_buffer(trans
, buffer2
, layer2
, sizeof(*layer2
));
332 KKASSERT(layer2
->zone
== zone
);
335 * NOTE: bytes_free can legally go negative due to de-dup.
337 layer2
->bytes_free
-= bytes
;
338 KKASSERT(layer2
->append_off
<= offset
);
339 layer2
->append_off
= offset
+ bytes
;
340 hammer_crc_set_layer2(hmp
->version
, layer2
);
341 hammer_modify_buffer_done(buffer2
);
344 * We hold the blockmap lock and should be the only ones
345 * capable of modifying resv->append_off. Track the allocation
348 KKASSERT(bytes
!= 0);
350 KKASSERT(resv
->append_off
<= offset
);
351 resv
->append_off
= offset
+ bytes
;
352 resv
->flags
&= ~HAMMER_RESF_LAYER2FREE
;
353 hammer_blockmap_reserve_complete(hmp
, resv
);
357 * If we are allocating from the base of a new buffer we can avoid
358 * a disk read by calling hammer_bnew_ext().
360 if ((next_offset
& HAMMER_BUFMASK
) == 0) {
361 hammer_bnew_ext(trans
->hmp
, next_offset
, bytes
,
368 result_offset
= next_offset
;
371 * If we weren't supplied with a hint or could not use the hint
372 * then we wound up using blockmap->next_offset as the hint and
376 hammer_modify_volume_noundo(NULL
, root_volume
);
377 blockmap
->next_offset
= next_offset
+ bytes
;
378 hammer_modify_volume_done(root_volume
);
380 hammer_unlock(&hmp
->blkmap_lock
);
387 hammer_rel_buffer(buffer1
, 0);
389 hammer_rel_buffer(buffer2
, 0);
391 hammer_rel_buffer(buffer3
, 0);
393 return(result_offset
);
397 * Frontend function - Reserve bytes in a zone.
399 * This code reserves bytes out of a blockmap without committing to any
400 * meta-data modifications, allowing the front-end to directly issue disk
401 * write I/O for big-blocks of data
403 * The backend later finalizes the reservation with hammer_blockmap_finalize()
404 * upon committing the related record.
407 hammer_blockmap_reserve(hammer_mount_t hmp
, int zone
, int bytes
,
408 hammer_off_t
*zone_offp
, int *errorp
)
410 hammer_volume_t root_volume
;
411 hammer_blockmap_t blockmap
;
412 hammer_blockmap_t freemap
;
413 hammer_blockmap_layer1_t layer1
;
414 hammer_blockmap_layer2_t layer2
;
415 hammer_buffer_t buffer1
= NULL
;
416 hammer_buffer_t buffer2
= NULL
;
417 hammer_buffer_t buffer3
= NULL
;
418 hammer_off_t tmp_offset
;
419 hammer_off_t next_offset
;
420 hammer_off_t layer1_offset
;
421 hammer_off_t layer2_offset
;
422 hammer_off_t base_off
;
423 hammer_reserve_t resv
;
424 hammer_reserve_t resx
= NULL
;
431 KKASSERT(hammer_is_index_record(zone
));
432 root_volume
= hammer_get_root_volume(hmp
, errorp
);
435 blockmap
= &hmp
->blockmap
[zone
];
436 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
437 KKASSERT(HAMMER_ZONE_DECODE(blockmap
->next_offset
) == zone
);
440 * Deal with alignment and buffer-boundary issues.
442 * Be careful, certain primary alignments are used below to allocate
443 * new blockmap blocks.
445 bytes
= HAMMER_DATA_DOALIGN(bytes
);
446 KKASSERT(bytes
> 0 && bytes
<= HAMMER_XBUFSIZE
);
448 next_offset
= blockmap
->next_offset
;
454 if (next_offset
== HAMMER_ZONE_ENCODE(zone
+ 1, 0)) {
456 hmkprintf(hmp
, "No space left for zone %d "
457 "reservation\n", zone
);
461 next_offset
= HAMMER_ZONE_ENCODE(zone
, 0);
465 * The allocation request may not cross a buffer boundary. Special
466 * large allocations must not cross a big-block boundary.
468 tmp_offset
= next_offset
+ bytes
- 1;
469 if (bytes
<= HAMMER_BUFSIZE
) {
470 if ((next_offset
^ tmp_offset
) & ~HAMMER_BUFMASK64
) {
471 next_offset
= tmp_offset
& ~HAMMER_BUFMASK64
;
475 if ((next_offset
^ tmp_offset
) & ~HAMMER_BIGBLOCK_MASK64
) {
476 next_offset
= tmp_offset
& ~HAMMER_BIGBLOCK_MASK64
;
480 offset
= (int)next_offset
& HAMMER_BIGBLOCK_MASK
;
485 layer1_offset
= freemap
->phys_offset
+
486 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset
);
487 layer1
= hammer_bread(hmp
, layer1_offset
, errorp
, &buffer1
);
494 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
495 hammer_lock_ex(&hmp
->blkmap_lock
);
496 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
497 hpanic("CRC FAILED: LAYER1");
498 hammer_unlock(&hmp
->blkmap_lock
);
502 * If we are at a big-block boundary and layer1 indicates no
503 * free big-blocks, then we cannot allocate a new big-block in
504 * layer2, skip to the next layer1 entry.
506 if ((next_offset
& HAMMER_BIGBLOCK_MASK
) == 0 &&
507 layer1
->blocks_free
== 0) {
508 next_offset
= HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset
);
509 if (hammer_check_volume(hmp
, &next_offset
))
513 KKASSERT(layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
516 * Dive layer 2, each entry represents a big-block.
518 layer2_offset
= layer1
->phys_offset
+
519 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset
);
520 layer2
= hammer_bread(hmp
, layer2_offset
, errorp
, &buffer2
);
525 * Check CRC if not allocating into uninitialized space (which we
526 * aren't when reserving space).
528 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
529 hammer_lock_ex(&hmp
->blkmap_lock
);
530 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
531 hpanic("CRC FAILED: LAYER2");
532 hammer_unlock(&hmp
->blkmap_lock
);
536 * Skip the layer if the zone is owned by someone other then us.
538 if (layer2
->zone
&& layer2
->zone
!= zone
) {
539 next_offset
+= (HAMMER_BIGBLOCK_SIZE
- offset
);
542 if (offset
< layer2
->append_off
) {
543 next_offset
+= layer2
->append_off
- offset
;
548 * We need the lock from this point on. We have to re-check zone
549 * ownership after acquiring the lock and also check for reservations.
551 hammer_lock_ex(&hmp
->blkmap_lock
);
553 if (layer2
->zone
&& layer2
->zone
!= zone
) {
554 hammer_unlock(&hmp
->blkmap_lock
);
555 next_offset
+= (HAMMER_BIGBLOCK_SIZE
- offset
);
558 if (offset
< layer2
->append_off
) {
559 hammer_unlock(&hmp
->blkmap_lock
);
560 next_offset
+= layer2
->append_off
- offset
;
565 * The big-block might be reserved by another zone. If it is reserved
566 * by our zone we may have to move next_offset past the append_off.
568 base_off
= hammer_xlate_to_zone2(next_offset
& ~HAMMER_BIGBLOCK_MASK64
);
569 resv
= RB_LOOKUP(hammer_res_rb_tree
, &hmp
->rb_resv_root
, base_off
);
571 if (resv
->zone
!= zone
) {
572 hammer_unlock(&hmp
->blkmap_lock
);
573 next_offset
= HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset
);
576 if (offset
< resv
->append_off
) {
577 hammer_unlock(&hmp
->blkmap_lock
);
578 next_offset
+= resv
->append_off
- offset
;
583 resx
= kmalloc(sizeof(*resv
), hmp
->m_misc
,
584 M_WAITOK
| M_ZERO
| M_USE_RESERVE
);
587 resx
->zone_offset
= base_off
;
588 if (layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
)
589 resx
->flags
|= HAMMER_RESF_LAYER2FREE
;
590 resv
= RB_INSERT(hammer_res_rb_tree
, &hmp
->rb_resv_root
, resx
);
591 KKASSERT(resv
== NULL
);
593 ++hammer_count_reservations
;
595 resv
->append_off
= offset
+ bytes
;
598 * If we are not reserving a whole buffer but are at the start of
599 * a new block, call hammer_bnew() to avoid a disk read.
601 * If we are reserving a whole buffer (or more), the caller will
602 * probably use a direct read, so do nothing.
604 * If we do not have a whole lot of system memory we really can't
605 * afford to block while holding the blkmap_lock!
607 if (bytes
< HAMMER_BUFSIZE
&& (next_offset
& HAMMER_BUFMASK
) == 0) {
608 if (!vm_paging_min_dnc(HAMMER_BUFSIZE
/ PAGE_SIZE
)) {
609 hammer_bnew(hmp
, next_offset
, errorp
, &buffer3
);
615 blockmap
->next_offset
= next_offset
+ bytes
;
616 hammer_unlock(&hmp
->blkmap_lock
);
620 hammer_rel_buffer(buffer1
, 0);
622 hammer_rel_buffer(buffer2
, 0);
624 hammer_rel_buffer(buffer3
, 0);
625 hammer_rel_volume(root_volume
, 0);
626 *zone_offp
= next_offset
;
632 * Dereference a reservation structure. Upon the final release the
633 * underlying big-block is checked and if it is entirely free we delete
634 * any related HAMMER buffers to avoid potential conflicts with future
635 * reuse of the big-block.
638 hammer_blockmap_reserve_complete(hammer_mount_t hmp
, hammer_reserve_t resv
)
640 hammer_off_t base_offset
;
643 KKASSERT(resv
->refs
> 0);
644 KKASSERT(hammer_is_zone_raw_buffer(resv
->zone_offset
));
647 * Setting append_off to the max prevents any new allocations
648 * from occuring while we are trying to dispose of the reservation,
649 * allowing us to safely delete any related HAMMER buffers.
651 * If we are unable to clean out all related HAMMER buffers we
654 if (resv
->refs
== 1 && (resv
->flags
& HAMMER_RESF_LAYER2FREE
)) {
655 resv
->append_off
= HAMMER_BIGBLOCK_SIZE
;
656 base_offset
= hammer_xlate_to_zoneX(resv
->zone
, resv
->zone_offset
);
657 error
= hammer_del_buffers(hmp
, base_offset
,
659 HAMMER_BIGBLOCK_SIZE
,
661 if (hammer_debug_general
& 0x20000) {
662 hkprintf("delbgblk %016jx error %d\n",
663 (intmax_t)base_offset
, error
);
666 hammer_reserve_setdelay(hmp
, resv
);
668 if (--resv
->refs
== 0) {
669 if (hammer_debug_general
& 0x20000) {
670 hkprintf("delresvr %016jx zone %02x\n",
671 (intmax_t)resv
->zone_offset
, resv
->zone
);
673 KKASSERT((resv
->flags
& HAMMER_RESF_ONDELAY
) == 0);
674 RB_REMOVE(hammer_res_rb_tree
, &hmp
->rb_resv_root
, resv
);
675 kfree(resv
, hmp
->m_misc
);
676 --hammer_count_reservations
;
681 * Prevent a potentially free big-block from being reused until after
682 * the related flushes have completely cycled, otherwise crash recovery
683 * could resurrect a data block that was already reused and overwritten.
685 * The caller might reset the underlying layer2 entry's append_off to 0, so
686 * our covering append_off must be set to max to prevent any reallocation
687 * until after the flush delays complete, not to mention proper invalidation
688 * of any underlying cached blocks.
691 hammer_reserve_setdelay_offset(hammer_mount_t hmp
, hammer_off_t base_offset
,
692 int zone
, hammer_blockmap_layer2_t layer2
)
694 hammer_reserve_t resv
;
697 * Allocate the reservation if necessary.
699 * NOTE: need lock in future around resv lookup/allocation and
700 * the setdelay call, currently refs is not bumped until the call.
703 resv
= RB_LOOKUP(hammer_res_rb_tree
, &hmp
->rb_resv_root
, base_offset
);
705 resv
= kmalloc(sizeof(*resv
), hmp
->m_misc
,
706 M_WAITOK
| M_ZERO
| M_USE_RESERVE
);
708 resv
->zone_offset
= base_offset
;
710 resv
->append_off
= HAMMER_BIGBLOCK_SIZE
;
712 if (layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
)
713 resv
->flags
|= HAMMER_RESF_LAYER2FREE
;
714 if (RB_INSERT(hammer_res_rb_tree
, &hmp
->rb_resv_root
, resv
)) {
715 kfree(resv
, hmp
->m_misc
);
718 ++hammer_count_reservations
;
720 if (layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
)
721 resv
->flags
|= HAMMER_RESF_LAYER2FREE
;
723 hammer_reserve_setdelay(hmp
, resv
);
727 * Enter the reservation on the on-delay list, or move it if it
728 * is already on the list.
731 hammer_reserve_setdelay(hammer_mount_t hmp
, hammer_reserve_t resv
)
733 if (resv
->flags
& HAMMER_RESF_ONDELAY
) {
734 TAILQ_REMOVE(&hmp
->delay_list
, resv
, delay_entry
);
735 resv
->flg_no
= hmp
->flusher
.next
+ 1;
736 TAILQ_INSERT_TAIL(&hmp
->delay_list
, resv
, delay_entry
);
739 ++hmp
->rsv_fromdelay
;
740 resv
->flags
|= HAMMER_RESF_ONDELAY
;
741 resv
->flg_no
= hmp
->flusher
.next
+ 1;
742 TAILQ_INSERT_TAIL(&hmp
->delay_list
, resv
, delay_entry
);
747 * Reserve has reached its flush point, remove it from the delay list
748 * and finish it off. hammer_blockmap_reserve_complete() inherits
749 * the ondelay reference.
752 hammer_reserve_clrdelay(hammer_mount_t hmp
, hammer_reserve_t resv
)
754 KKASSERT(resv
->flags
& HAMMER_RESF_ONDELAY
);
755 resv
->flags
&= ~HAMMER_RESF_ONDELAY
;
756 TAILQ_REMOVE(&hmp
->delay_list
, resv
, delay_entry
);
757 --hmp
->rsv_fromdelay
;
758 hammer_blockmap_reserve_complete(hmp
, resv
);
762 * Backend function - free (offset, bytes) in a zone.
767 hammer_blockmap_free(hammer_transaction_t trans
,
768 hammer_off_t zone_offset
, int bytes
)
771 hammer_volume_t root_volume
;
772 hammer_blockmap_t freemap
;
773 hammer_blockmap_layer1_t layer1
;
774 hammer_blockmap_layer2_t layer2
;
775 hammer_buffer_t buffer1
= NULL
;
776 hammer_buffer_t buffer2
= NULL
;
777 hammer_off_t layer1_offset
;
778 hammer_off_t layer2_offset
;
779 hammer_off_t base_off
;
790 bytes
= HAMMER_DATA_DOALIGN(bytes
);
791 KKASSERT(bytes
<= HAMMER_XBUFSIZE
);
792 KKASSERT(((zone_offset
^ (zone_offset
+ (bytes
- 1))) &
793 ~HAMMER_BIGBLOCK_MASK64
) == 0);
796 * Basic zone validation & locking
798 zone
= HAMMER_ZONE_DECODE(zone_offset
);
799 KKASSERT(hammer_is_index_record(zone
));
800 root_volume
= trans
->rootvol
;
803 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
808 layer1_offset
= freemap
->phys_offset
+
809 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset
);
810 layer1
= hammer_bread(hmp
, layer1_offset
, &error
, &buffer1
);
813 KKASSERT(layer1
->phys_offset
&&
814 layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
815 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
816 hammer_lock_ex(&hmp
->blkmap_lock
);
817 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
818 hpanic("CRC FAILED: LAYER1");
819 hammer_unlock(&hmp
->blkmap_lock
);
823 * Dive layer 2, each entry represents a big-block.
825 layer2_offset
= layer1
->phys_offset
+
826 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset
);
827 layer2
= hammer_bread(hmp
, layer2_offset
, &error
, &buffer2
);
830 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
831 hammer_lock_ex(&hmp
->blkmap_lock
);
832 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
833 hpanic("CRC FAILED: LAYER2");
834 hammer_unlock(&hmp
->blkmap_lock
);
837 hammer_lock_ex(&hmp
->blkmap_lock
);
839 hammer_modify_buffer(trans
, buffer2
, layer2
, sizeof(*layer2
));
842 * Free space previously allocated via blockmap_alloc().
844 * NOTE: bytes_free can be and remain negative due to de-dup ops
845 * but can never become larger than HAMMER_BIGBLOCK_SIZE.
847 KKASSERT(layer2
->zone
== zone
);
848 layer2
->bytes_free
+= bytes
;
849 KKASSERT(layer2
->bytes_free
<= HAMMER_BIGBLOCK_SIZE
);
852 * If a big-block becomes entirely free we must create a covering
853 * reservation to prevent premature reuse. Note, however, that
854 * the big-block and/or reservation may still have an append_off
855 * that allows further (non-reused) allocations.
857 * Once the reservation has been made we re-check layer2 and if
858 * the big-block is still entirely free we reset the layer2 entry.
859 * The reservation will prevent premature reuse.
861 * NOTE: hammer_buffer's are only invalidated when the reservation
862 * is completed, if the layer2 entry is still completely free at
863 * that time. Any allocations from the reservation that may have
864 * occured in the mean time, or active references on the reservation
865 * from new pending allocations, will prevent the invalidation from
868 if (layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
) {
869 base_off
= hammer_xlate_to_zone2(zone_offset
&
870 ~HAMMER_BIGBLOCK_MASK64
);
872 hammer_reserve_setdelay_offset(hmp
, base_off
, zone
, layer2
);
873 if (layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
) {
875 layer2
->append_off
= 0;
876 hammer_modify_buffer(trans
, buffer1
,
877 layer1
, sizeof(*layer1
));
878 ++layer1
->blocks_free
;
879 hammer_crc_set_layer1(hmp
->version
, layer1
);
880 hammer_modify_buffer_done(buffer1
);
881 hammer_modify_volume_field(trans
,
883 vol0_stat_freebigblocks
);
884 ++root_volume
->ondisk
->vol0_stat_freebigblocks
;
885 hmp
->copy_stat_freebigblocks
=
886 root_volume
->ondisk
->vol0_stat_freebigblocks
;
887 hammer_modify_volume_done(trans
->rootvol
);
890 hammer_crc_set_layer2(hmp
->version
, layer2
);
891 hammer_modify_buffer_done(buffer2
);
892 hammer_unlock(&hmp
->blkmap_lock
);
896 hammer_rel_buffer(buffer1
, 0);
898 hammer_rel_buffer(buffer2
, 0);
902 hammer_blockmap_dedup(hammer_transaction_t trans
,
903 hammer_off_t zone_offset
, int bytes
)
906 hammer_blockmap_t freemap
;
907 hammer_blockmap_layer1_t layer1
;
908 hammer_blockmap_layer2_t layer2
;
909 hammer_buffer_t buffer1
= NULL
;
910 hammer_buffer_t buffer2
= NULL
;
911 hammer_off_t layer1_offset
;
912 hammer_off_t layer2_offset
;
924 bytes
= HAMMER_DATA_DOALIGN(bytes
);
925 KKASSERT(bytes
<= HAMMER_BIGBLOCK_SIZE
);
926 KKASSERT(((zone_offset
^ (zone_offset
+ (bytes
- 1))) &
927 ~HAMMER_BIGBLOCK_MASK64
) == 0);
930 * Basic zone validation & locking
932 zone
= HAMMER_ZONE_DECODE(zone_offset
);
933 KKASSERT(hammer_is_index_record(zone
));
936 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
941 layer1_offset
= freemap
->phys_offset
+
942 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset
);
943 layer1
= hammer_bread(hmp
, layer1_offset
, &error
, &buffer1
);
946 KKASSERT(layer1
->phys_offset
&&
947 layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
948 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
949 hammer_lock_ex(&hmp
->blkmap_lock
);
950 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
951 hpanic("CRC FAILED: LAYER1");
952 hammer_unlock(&hmp
->blkmap_lock
);
956 * Dive layer 2, each entry represents a big-block.
958 layer2_offset
= layer1
->phys_offset
+
959 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset
);
960 layer2
= hammer_bread(hmp
, layer2_offset
, &error
, &buffer2
);
963 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
964 hammer_lock_ex(&hmp
->blkmap_lock
);
965 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
966 hpanic("CRC FAILED: LAYER2");
967 hammer_unlock(&hmp
->blkmap_lock
);
970 hammer_lock_ex(&hmp
->blkmap_lock
);
972 hammer_modify_buffer(trans
, buffer2
, layer2
, sizeof(*layer2
));
975 * Free space previously allocated via blockmap_alloc().
977 * NOTE: bytes_free can be and remain negative due to de-dup ops
978 * but can never become larger than HAMMER_BIGBLOCK_SIZE.
980 KKASSERT(layer2
->zone
== zone
);
981 temp
= layer2
->bytes_free
- HAMMER_BIGBLOCK_SIZE
* 2;
982 cpu_ccfence(); /* prevent gcc from optimizing temp out */
983 if (temp
> layer2
->bytes_free
) {
987 layer2
->bytes_free
-= bytes
;
989 KKASSERT(layer2
->bytes_free
<= HAMMER_BIGBLOCK_SIZE
);
991 hammer_crc_set_layer2(hmp
->version
, layer2
);
993 hammer_modify_buffer_done(buffer2
);
994 hammer_unlock(&hmp
->blkmap_lock
);
998 hammer_rel_buffer(buffer1
, 0);
1000 hammer_rel_buffer(buffer2
, 0);
1005 * Backend function - finalize (offset, bytes) in a zone.
1007 * Allocate space that was previously reserved by the frontend.
1010 hammer_blockmap_finalize(hammer_transaction_t trans
,
1011 hammer_reserve_t resv
,
1012 hammer_off_t zone_offset
, int bytes
)
1015 hammer_volume_t root_volume
;
1016 hammer_blockmap_t freemap
;
1017 hammer_blockmap_layer1_t layer1
;
1018 hammer_blockmap_layer2_t layer2
;
1019 hammer_buffer_t buffer1
= NULL
;
1020 hammer_buffer_t buffer2
= NULL
;
1021 hammer_off_t layer1_offset
;
1022 hammer_off_t layer2_offset
;
1034 bytes
= HAMMER_DATA_DOALIGN(bytes
);
1035 KKASSERT(bytes
<= HAMMER_XBUFSIZE
);
1038 * Basic zone validation & locking
1040 zone
= HAMMER_ZONE_DECODE(zone_offset
);
1041 KKASSERT(hammer_is_index_record(zone
));
1042 root_volume
= trans
->rootvol
;
1045 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
1050 layer1_offset
= freemap
->phys_offset
+
1051 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset
);
1052 layer1
= hammer_bread(hmp
, layer1_offset
, &error
, &buffer1
);
1055 KKASSERT(layer1
->phys_offset
&&
1056 layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
1057 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
1058 hammer_lock_ex(&hmp
->blkmap_lock
);
1059 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
1060 hpanic("CRC FAILED: LAYER1");
1061 hammer_unlock(&hmp
->blkmap_lock
);
1065 * Dive layer 2, each entry represents a big-block.
1067 layer2_offset
= layer1
->phys_offset
+
1068 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset
);
1069 layer2
= hammer_bread(hmp
, layer2_offset
, &error
, &buffer2
);
1072 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
1073 hammer_lock_ex(&hmp
->blkmap_lock
);
1074 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
1075 hpanic("CRC FAILED: LAYER2");
1076 hammer_unlock(&hmp
->blkmap_lock
);
1079 hammer_lock_ex(&hmp
->blkmap_lock
);
1081 hammer_modify_buffer(trans
, buffer2
, layer2
, sizeof(*layer2
));
1084 * Finalize some or all of the space covered by a current
1085 * reservation. An allocation in the same layer may have
1086 * already assigned ownership.
1088 if (layer2
->zone
== 0) {
1089 hammer_modify_buffer(trans
, buffer1
, layer1
, sizeof(*layer1
));
1090 --layer1
->blocks_free
;
1091 hammer_crc_set_layer1(hmp
->version
, layer1
);
1092 hammer_modify_buffer_done(buffer1
);
1093 layer2
->zone
= zone
;
1094 KKASSERT(layer2
->bytes_free
== HAMMER_BIGBLOCK_SIZE
);
1095 KKASSERT(layer2
->append_off
== 0);
1096 hammer_modify_volume_field(trans
,
1098 vol0_stat_freebigblocks
);
1099 --root_volume
->ondisk
->vol0_stat_freebigblocks
;
1100 hmp
->copy_stat_freebigblocks
=
1101 root_volume
->ondisk
->vol0_stat_freebigblocks
;
1102 hammer_modify_volume_done(trans
->rootvol
);
1104 if (layer2
->zone
!= zone
)
1105 hdkprintf("layer2 zone mismatch %d %d\n", layer2
->zone
, zone
);
1106 KKASSERT(layer2
->zone
== zone
);
1107 KKASSERT(bytes
!= 0);
1108 layer2
->bytes_free
-= bytes
;
1110 resv
->flags
&= ~HAMMER_RESF_LAYER2FREE
;
1113 * Finalizations can occur out of order, or combined with allocations.
1114 * append_off must be set to the highest allocated offset.
1116 offset
= ((int)zone_offset
& HAMMER_BIGBLOCK_MASK
) + bytes
;
1117 if (layer2
->append_off
< offset
)
1118 layer2
->append_off
= offset
;
1120 hammer_crc_set_layer2(hmp
->version
, layer2
);
1121 hammer_modify_buffer_done(buffer2
);
1122 hammer_unlock(&hmp
->blkmap_lock
);
1126 hammer_rel_buffer(buffer1
, 0);
1128 hammer_rel_buffer(buffer2
, 0);
1133 * Return the approximate number of free bytes in the big-block
1134 * containing the specified blockmap offset.
1136 * WARNING: A negative number can be returned if data de-dup exists,
1137 * and the result will also not represent he actual number
1138 * of free bytes in this case.
1140 * This code is used only by the reblocker.
1143 hammer_blockmap_getfree(hammer_mount_t hmp
, hammer_off_t zone_offset
,
1144 int *curp
, int *errorp
)
1146 hammer_volume_t root_volume
;
1147 hammer_blockmap_t blockmap
;
1148 hammer_blockmap_t freemap
;
1149 hammer_blockmap_layer1_t layer1
;
1150 hammer_blockmap_layer2_t layer2
;
1151 hammer_buffer_t buffer
= NULL
;
1152 hammer_off_t layer1_offset
;
1153 hammer_off_t layer2_offset
;
1157 zone
= HAMMER_ZONE_DECODE(zone_offset
);
1158 KKASSERT(hammer_is_index_record(zone
));
1159 root_volume
= hammer_get_root_volume(hmp
, errorp
);
1164 blockmap
= &hmp
->blockmap
[zone
];
1165 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
1170 layer1_offset
= freemap
->phys_offset
+
1171 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset
);
1172 layer1
= hammer_bread(hmp
, layer1_offset
, errorp
, &buffer
);
1178 KKASSERT(layer1
->phys_offset
);
1179 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
1180 hammer_lock_ex(&hmp
->blkmap_lock
);
1181 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
1182 hpanic("CRC FAILED: LAYER1");
1183 hammer_unlock(&hmp
->blkmap_lock
);
1187 * Dive layer 2, each entry represents a big-block.
1189 * (reuse buffer, layer1 pointer becomes invalid)
1191 layer2_offset
= layer1
->phys_offset
+
1192 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset
);
1193 layer2
= hammer_bread(hmp
, layer2_offset
, errorp
, &buffer
);
1199 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
1200 hammer_lock_ex(&hmp
->blkmap_lock
);
1201 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
1202 hpanic("CRC FAILED: LAYER2");
1203 hammer_unlock(&hmp
->blkmap_lock
);
1205 KKASSERT(layer2
->zone
== zone
);
1207 bytes
= layer2
->bytes_free
;
1210 * *curp becomes 1 only when no error and,
1211 * next_offset and zone_offset are in the same big-block.
1213 if ((blockmap
->next_offset
^ zone_offset
) & ~HAMMER_BIGBLOCK_MASK64
)
1214 *curp
= 0; /* not same */
1219 hammer_rel_buffer(buffer
, 0);
1220 hammer_rel_volume(root_volume
, 0);
1221 if (hammer_debug_general
& 0x4000) {
1222 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset
, bytes
);
1229 * Lookup a blockmap offset and verify blockmap layers.
1232 hammer_blockmap_lookup_verify(hammer_mount_t hmp
, hammer_off_t zone_offset
,
1235 hammer_volume_t root_volume
;
1236 hammer_blockmap_t freemap
;
1237 hammer_blockmap_layer1_t layer1
;
1238 hammer_blockmap_layer2_t layer2
;
1239 hammer_buffer_t buffer
= NULL
;
1240 hammer_off_t layer1_offset
;
1241 hammer_off_t layer2_offset
;
1242 hammer_off_t result_offset
;
1243 hammer_off_t base_off
;
1244 hammer_reserve_t resv __debugvar
;
1248 * Calculate the zone-2 offset.
1250 zone
= HAMMER_ZONE_DECODE(zone_offset
);
1251 result_offset
= hammer_xlate_to_zone2(zone_offset
);
1254 * Validate the allocation zone
1256 root_volume
= hammer_get_root_volume(hmp
, errorp
);
1259 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
1260 KKASSERT(freemap
->phys_offset
!= 0);
1265 layer1_offset
= freemap
->phys_offset
+
1266 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset
);
1267 layer1
= hammer_bread(hmp
, layer1_offset
, errorp
, &buffer
);
1270 KKASSERT(layer1
->phys_offset
!= HAMMER_BLOCKMAP_UNAVAIL
);
1271 if (!hammer_crc_test_layer1(hmp
->version
, layer1
)) {
1272 hammer_lock_ex(&hmp
->blkmap_lock
);
1273 if (!hammer_crc_test_layer1(hmp
->version
, layer1
))
1274 hpanic("CRC FAILED: LAYER1");
1275 hammer_unlock(&hmp
->blkmap_lock
);
1279 * Dive layer 2, each entry represents a big-block.
1281 layer2_offset
= layer1
->phys_offset
+
1282 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset
);
1283 layer2
= hammer_bread(hmp
, layer2_offset
, errorp
, &buffer
);
1287 if (layer2
->zone
== 0) {
1288 base_off
= hammer_xlate_to_zone2(zone_offset
&
1289 ~HAMMER_BIGBLOCK_MASK64
);
1290 resv
= RB_LOOKUP(hammer_res_rb_tree
, &hmp
->rb_resv_root
,
1292 KKASSERT(resv
&& resv
->zone
== zone
);
1294 } else if (layer2
->zone
!= zone
) {
1295 hpanic("bad zone %d/%d", layer2
->zone
, zone
);
1297 if (!hammer_crc_test_layer2(hmp
->version
, layer2
)) {
1298 hammer_lock_ex(&hmp
->blkmap_lock
);
1299 if (!hammer_crc_test_layer2(hmp
->version
, layer2
))
1300 hpanic("CRC FAILED: LAYER2");
1301 hammer_unlock(&hmp
->blkmap_lock
);
1306 hammer_rel_buffer(buffer
, 0);
1307 hammer_rel_volume(root_volume
, 0);
1308 if (hammer_debug_general
& 0x0800) {
1309 hdkprintf("%016jx -> %016jx\n",
1310 (intmax_t)zone_offset
, (intmax_t)result_offset
);
1312 return(result_offset
);
1317 * Check space availability
1319 * MPSAFE - does not require fs_token
1322 _hammer_checkspace(hammer_mount_t hmp
, int slop
, int64_t *resp
)
1324 const int in_size
= sizeof(struct hammer_inode_data
) +
1325 sizeof(union hammer_btree_elm
);
1326 const int rec_size
= (sizeof(union hammer_btree_elm
) * 2);
1329 usedbytes
= hmp
->rsv_inodes
* in_size
+
1330 hmp
->rsv_recs
* rec_size
+
1331 hmp
->rsv_databytes
+
1332 ((int64_t)hmp
->rsv_fromdelay
<< HAMMER_BIGBLOCK_BITS
) +
1333 ((int64_t)hammer_limit_dirtybufspace
) +
1334 (slop
<< HAMMER_BIGBLOCK_BITS
);
1339 if (hmp
->copy_stat_freebigblocks
>=
1340 (usedbytes
>> HAMMER_BIGBLOCK_BITS
)) {
1348 hammer_check_volume(hammer_mount_t hmp
, hammer_off_t
*offsetp
)
1350 hammer_blockmap_t freemap
;
1351 hammer_blockmap_layer1_t layer1
;
1352 hammer_buffer_t buffer1
= NULL
;
1353 hammer_off_t layer1_offset
;
1356 freemap
= &hmp
->blockmap
[HAMMER_ZONE_FREEMAP_INDEX
];
1358 layer1_offset
= freemap
->phys_offset
+
1359 HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp
);
1360 layer1
= hammer_bread(hmp
, layer1_offset
, &error
, &buffer1
);
1365 * No more physically available space in layer1s
1366 * of the current volume, go to the next volume.
1368 if (layer1
->phys_offset
== HAMMER_BLOCKMAP_UNAVAIL
)
1369 hammer_skip_volume(offsetp
);
1372 hammer_rel_buffer(buffer1
, 0);
1377 hammer_skip_volume(hammer_off_t
*offsetp
)
1379 hammer_off_t offset
;
1383 zone
= HAMMER_ZONE_DECODE(offset
);
1384 vol_no
= HAMMER_VOL_DECODE(offset
) + 1;
1385 KKASSERT(vol_no
<= HAMMER_MAX_VOLUMES
);
1387 if (vol_no
== HAMMER_MAX_VOLUMES
) { /* wrap */
1392 *offsetp
= HAMMER_ENCODE(zone
, vol_no
, 0);