HAMMER - Stabilize and refactor volume removal
[dragonfly.git] / sys / vfs / hammer / hammer_blockmap.c
blob89b5dce34c38c526aac92b7639f90f2876286583
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
38 * HAMMER blockmap
40 #include "hammer.h"
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 hammer_off_t base_offset, int zone,
45 struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
49 * Reserved big-blocks red-black tree support
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52 hammer_res_rb_compare, hammer_off_t, zone_offset);
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 if (res1->zone_offset < res2->zone_offset)
58 return(-1);
59 if (res1->zone_offset > res2->zone_offset)
60 return(1);
61 return(0);
65 * Allocate bytes from a zone
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69 hammer_off_t hint, int *errorp)
71 hammer_mount_t hmp;
72 hammer_volume_t root_volume;
73 hammer_blockmap_t blockmap;
74 hammer_blockmap_t freemap;
75 hammer_reserve_t resv;
76 struct hammer_blockmap_layer1 *layer1;
77 struct hammer_blockmap_layer2 *layer2;
78 hammer_buffer_t buffer1 = NULL;
79 hammer_buffer_t buffer2 = NULL;
80 hammer_buffer_t buffer3 = NULL;
81 hammer_off_t tmp_offset;
82 hammer_off_t next_offset;
83 hammer_off_t result_offset;
84 hammer_off_t layer1_offset;
85 hammer_off_t layer2_offset;
86 hammer_off_t base_off;
87 int loops = 0;
88 int offset; /* offset within big-block */
89 int use_hint;
91 hmp = trans->hmp;
94 * Deal with alignment and buffer-boundary issues.
96 * Be careful, certain primary alignments are used below to allocate
97 * new blockmap blocks.
99 bytes = (bytes + 15) & ~15;
100 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
104 * Setup
106 root_volume = trans->rootvol;
107 *errorp = 0;
108 blockmap = &hmp->blockmap[zone];
109 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
113 * Use the hint if we have one.
115 if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116 next_offset = (hint + 15) & ~(hammer_off_t)15;
117 use_hint = 1;
118 } else {
119 next_offset = blockmap->next_offset;
120 use_hint = 0;
122 again:
125 * use_hint is turned off if we leave the hinted big-block.
127 if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128 next_offset = blockmap->next_offset;
129 use_hint = 0;
133 * Check for wrap
135 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136 if (++loops == 2) {
137 result_offset = 0;
138 *errorp = ENOSPC;
139 goto failed;
141 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145 * The allocation request may not cross a buffer boundary. Special
146 * large allocations must not cross a large-block boundary.
148 tmp_offset = next_offset + bytes - 1;
149 if (bytes <= HAMMER_BUFSIZE) {
150 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152 goto again;
154 } else {
155 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
156 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
157 goto again;
160 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
163 * Dive layer 1.
165 layer1_offset = freemap->phys_offset +
166 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169 if (*errorp) {
170 result_offset = 0;
171 goto failed;
175 * Check CRC.
177 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178 hammer_lock_ex(&hmp->blkmap_lock);
179 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180 panic("CRC FAILED: LAYER1");
181 hammer_unlock(&hmp->blkmap_lock);
185 * If we are at a big-block boundary and layer1 indicates no
186 * free big-blocks, then we cannot allocate a new bigblock in
187 * layer2, skip to the next layer1 entry.
189 if (offset == 0 && layer1->blocks_free == 0) {
190 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191 ~HAMMER_BLOCKMAP_LAYER2_MASK;
192 goto again;
194 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
197 * Skip this layer1 entry if it is pointing to a layer2 big-block
198 * on a volume that we are currently trying to remove from the
199 * file-system. This is used by the volume-del code together with
200 * the reblocker to free up a volume.
202 if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
203 hmp->volume_to_remove) {
204 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
205 ~HAMMER_BLOCKMAP_LAYER2_MASK;
206 goto again;
210 * Dive layer 2, each entry represents a large-block.
212 layer2_offset = layer1->phys_offset +
213 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
214 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
215 if (*errorp) {
216 result_offset = 0;
217 goto failed;
221 * Check CRC. This can race another thread holding the lock
222 * and in the middle of modifying layer2.
224 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
225 hammer_lock_ex(&hmp->blkmap_lock);
226 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
227 panic("CRC FAILED: LAYER2");
228 hammer_unlock(&hmp->blkmap_lock);
232 * Skip the layer if the zone is owned by someone other then us.
234 if (layer2->zone && layer2->zone != zone) {
235 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
236 goto again;
238 if (offset < layer2->append_off) {
239 next_offset += layer2->append_off - offset;
240 goto again;
244 * If operating in the current non-hint blockmap block, do not
245 * allow it to get over-full. Also drop any active hinting so
246 * blockmap->next_offset is updated at the end.
248 * We do this for B-Tree and meta-data allocations to provide
249 * localization for updates.
251 if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 zone == HAMMER_ZONE_META_INDEX) &&
253 offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254 !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
256 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
258 use_hint = 0;
259 goto again;
264 * We need the lock from this point on. We have to re-check zone
265 * ownership after acquiring the lock and also check for reservations.
267 hammer_lock_ex(&hmp->blkmap_lock);
269 if (layer2->zone && layer2->zone != zone) {
270 hammer_unlock(&hmp->blkmap_lock);
271 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
272 goto again;
274 if (offset < layer2->append_off) {
275 hammer_unlock(&hmp->blkmap_lock);
276 next_offset += layer2->append_off - offset;
277 goto again;
281 * The bigblock might be reserved by another zone. If it is reserved
282 * by our zone we may have to move next_offset past the append_off.
284 base_off = (next_offset &
285 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
286 HAMMER_ZONE_RAW_BUFFER;
287 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288 if (resv) {
289 if (resv->zone != zone) {
290 hammer_unlock(&hmp->blkmap_lock);
291 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
292 ~HAMMER_LARGEBLOCK_MASK64;
293 goto again;
295 if (offset < resv->append_off) {
296 hammer_unlock(&hmp->blkmap_lock);
297 next_offset += resv->append_off - offset;
298 goto again;
300 ++resv->refs;
304 * Ok, we can allocate out of this layer2 big-block. Assume ownership
305 * of the layer for real. At this point we've validated any
306 * reservation that might exist and can just ignore resv.
308 if (layer2->zone == 0) {
310 * Assign the bigblock to our zone
312 hammer_modify_buffer(trans, buffer1,
313 layer1, sizeof(*layer1));
314 --layer1->blocks_free;
315 layer1->layer1_crc = crc32(layer1,
316 HAMMER_LAYER1_CRCSIZE);
317 hammer_modify_buffer_done(buffer1);
318 hammer_modify_buffer(trans, buffer2,
319 layer2, sizeof(*layer2));
320 layer2->zone = zone;
321 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
322 KKASSERT(layer2->append_off == 0);
323 hammer_modify_volume_field(trans, trans->rootvol,
324 vol0_stat_freebigblocks);
325 --root_volume->ondisk->vol0_stat_freebigblocks;
326 hmp->copy_stat_freebigblocks =
327 root_volume->ondisk->vol0_stat_freebigblocks;
328 hammer_modify_volume_done(trans->rootvol);
329 } else {
330 hammer_modify_buffer(trans, buffer2,
331 layer2, sizeof(*layer2));
333 KKASSERT(layer2->zone == zone);
335 layer2->bytes_free -= bytes;
336 KKASSERT(layer2->append_off <= offset);
337 layer2->append_off = offset + bytes;
338 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
339 hammer_modify_buffer_done(buffer2);
340 KKASSERT(layer2->bytes_free >= 0);
343 * We hold the blockmap lock and should be the only ones
344 * capable of modifying resv->append_off. Track the allocation
345 * as appropriate.
347 KKASSERT(bytes != 0);
348 if (resv) {
349 KKASSERT(resv->append_off <= offset);
350 resv->append_off = offset + bytes;
351 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
352 hammer_blockmap_reserve_complete(hmp, resv);
356 * If we are allocating from the base of a new buffer we can avoid
357 * a disk read by calling hammer_bnew().
359 if ((next_offset & HAMMER_BUFMASK) == 0) {
360 hammer_bnew_ext(trans->hmp, next_offset, bytes,
361 errorp, &buffer3);
363 result_offset = next_offset;
366 * If we weren't supplied with a hint or could not use the hint
367 * then we wound up using blockmap->next_offset as the hint and
368 * need to save it.
370 if (use_hint == 0) {
371 hammer_modify_volume(NULL, root_volume, NULL, 0);
372 blockmap->next_offset = next_offset + bytes;
373 hammer_modify_volume_done(root_volume);
375 hammer_unlock(&hmp->blkmap_lock);
376 failed:
379 * Cleanup
381 if (buffer1)
382 hammer_rel_buffer(buffer1, 0);
383 if (buffer2)
384 hammer_rel_buffer(buffer2, 0);
385 if (buffer3)
386 hammer_rel_buffer(buffer3, 0);
388 return(result_offset);
392 * Frontend function - Reserve bytes in a zone.
394 * This code reserves bytes out of a blockmap without committing to any
395 * meta-data modifications, allowing the front-end to directly issue disk
396 * write I/O for large blocks of data
398 * The backend later finalizes the reservation with hammer_blockmap_finalize()
399 * upon committing the related record.
401 hammer_reserve_t
402 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
403 hammer_off_t *zone_offp, int *errorp)
405 hammer_volume_t root_volume;
406 hammer_blockmap_t blockmap;
407 hammer_blockmap_t freemap;
408 struct hammer_blockmap_layer1 *layer1;
409 struct hammer_blockmap_layer2 *layer2;
410 hammer_buffer_t buffer1 = NULL;
411 hammer_buffer_t buffer2 = NULL;
412 hammer_buffer_t buffer3 = NULL;
413 hammer_off_t tmp_offset;
414 hammer_off_t next_offset;
415 hammer_off_t layer1_offset;
416 hammer_off_t layer2_offset;
417 hammer_off_t base_off;
418 hammer_reserve_t resv;
419 hammer_reserve_t resx;
420 int loops = 0;
421 int offset;
424 * Setup
426 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
427 root_volume = hammer_get_root_volume(hmp, errorp);
428 if (*errorp)
429 return(NULL);
430 blockmap = &hmp->blockmap[zone];
431 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
432 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435 * Deal with alignment and buffer-boundary issues.
437 * Be careful, certain primary alignments are used below to allocate
438 * new blockmap blocks.
440 bytes = (bytes + 15) & ~15;
441 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
443 next_offset = blockmap->next_offset;
444 again:
445 resv = NULL;
447 * Check for wrap
449 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
450 if (++loops == 2) {
451 *errorp = ENOSPC;
452 goto failed;
454 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
458 * The allocation request may not cross a buffer boundary. Special
459 * large allocations must not cross a large-block boundary.
461 tmp_offset = next_offset + bytes - 1;
462 if (bytes <= HAMMER_BUFSIZE) {
463 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
464 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
465 goto again;
467 } else {
468 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
469 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
470 goto again;
473 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
476 * Dive layer 1.
478 layer1_offset = freemap->phys_offset +
479 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
480 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
481 if (*errorp)
482 goto failed;
485 * Check CRC.
487 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
488 hammer_lock_ex(&hmp->blkmap_lock);
489 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
490 panic("CRC FAILED: LAYER1");
491 hammer_unlock(&hmp->blkmap_lock);
495 * If we are at a big-block boundary and layer1 indicates no
496 * free big-blocks, then we cannot allocate a new bigblock in
497 * layer2, skip to the next layer1 entry.
499 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
500 layer1->blocks_free == 0) {
501 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
502 ~HAMMER_BLOCKMAP_LAYER2_MASK;
503 goto again;
505 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508 * Dive layer 2, each entry represents a large-block.
510 layer2_offset = layer1->phys_offset +
511 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
512 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
513 if (*errorp)
514 goto failed;
517 * Check CRC if not allocating into uninitialized space (which we
518 * aren't when reserving space).
520 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
521 hammer_lock_ex(&hmp->blkmap_lock);
522 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
523 panic("CRC FAILED: LAYER2");
524 hammer_unlock(&hmp->blkmap_lock);
528 * Skip the layer if the zone is owned by someone other then us.
530 if (layer2->zone && layer2->zone != zone) {
531 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
532 goto again;
534 if (offset < layer2->append_off) {
535 next_offset += layer2->append_off - offset;
536 goto again;
540 * We need the lock from this point on. We have to re-check zone
541 * ownership after acquiring the lock and also check for reservations.
543 hammer_lock_ex(&hmp->blkmap_lock);
545 if (layer2->zone && layer2->zone != zone) {
546 hammer_unlock(&hmp->blkmap_lock);
547 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
548 goto again;
550 if (offset < layer2->append_off) {
551 hammer_unlock(&hmp->blkmap_lock);
552 next_offset += layer2->append_off - offset;
553 goto again;
557 * The bigblock might be reserved by another zone. If it is reserved
558 * by our zone we may have to move next_offset past the append_off.
560 base_off = (next_offset &
561 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
562 HAMMER_ZONE_RAW_BUFFER;
563 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
564 if (resv) {
565 if (resv->zone != zone) {
566 hammer_unlock(&hmp->blkmap_lock);
567 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
568 ~HAMMER_LARGEBLOCK_MASK64;
569 goto again;
571 if (offset < resv->append_off) {
572 hammer_unlock(&hmp->blkmap_lock);
573 next_offset += resv->append_off - offset;
574 goto again;
576 ++resv->refs;
577 resx = NULL;
578 } else {
579 resx = kmalloc(sizeof(*resv), hmp->m_misc,
580 M_WAITOK | M_ZERO | M_USE_RESERVE);
581 resx->refs = 1;
582 resx->zone = zone;
583 resx->zone_offset = base_off;
584 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
585 resx->flags |= HAMMER_RESF_LAYER2FREE;
586 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
587 KKASSERT(resv == NULL);
588 resv = resx;
589 ++hammer_count_reservations;
591 resv->append_off = offset + bytes;
594 * If we are not reserving a whole buffer but are at the start of
595 * a new block, call hammer_bnew() to avoid a disk read.
597 * If we are reserving a whole buffer (or more), the caller will
598 * probably use a direct read, so do nothing.
600 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
601 hammer_bnew(hmp, next_offset, errorp, &buffer3);
605 * Adjust our iterator and alloc_offset. The layer1 and layer2
606 * space beyond alloc_offset is uninitialized. alloc_offset must
607 * be big-block aligned.
609 blockmap->next_offset = next_offset + bytes;
610 hammer_unlock(&hmp->blkmap_lock);
612 failed:
613 if (buffer1)
614 hammer_rel_buffer(buffer1, 0);
615 if (buffer2)
616 hammer_rel_buffer(buffer2, 0);
617 if (buffer3)
618 hammer_rel_buffer(buffer3, 0);
619 hammer_rel_volume(root_volume, 0);
620 *zone_offp = next_offset;
622 return(resv);
626 * Dereference a reservation structure. Upon the final release the
627 * underlying big-block is checked and if it is entirely free we delete
628 * any related HAMMER buffers to avoid potential conflicts with future
629 * reuse of the big-block.
631 void
632 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
634 hammer_off_t base_offset;
635 int error;
637 KKASSERT(resv->refs > 0);
638 KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
639 HAMMER_ZONE_RAW_BUFFER);
642 * Setting append_off to the max prevents any new allocations
643 * from occuring while we are trying to dispose of the reservation,
644 * allowing us to safely delete any related HAMMER buffers.
646 * If we are unable to clean out all related HAMMER buffers we
647 * requeue the delay.
649 if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
650 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
651 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
652 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
653 error = hammer_del_buffers(hmp, base_offset,
654 resv->zone_offset,
655 HAMMER_LARGEBLOCK_SIZE,
657 if (error)
658 hammer_reserve_setdelay(hmp, resv);
660 if (--resv->refs == 0) {
661 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
662 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
663 kfree(resv, hmp->m_misc);
664 --hammer_count_reservations;
669 * Prevent a potentially free big-block from being reused until after
670 * the related flushes have completely cycled, otherwise crash recovery
671 * could resurrect a data block that was already reused and overwritten.
673 * The caller might reset the underlying layer2 entry's append_off to 0, so
674 * our covering append_off must be set to max to prevent any reallocation
675 * until after the flush delays complete, not to mention proper invalidation
676 * of any underlying cached blocks.
678 static void
679 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
680 int zone, struct hammer_blockmap_layer2 *layer2)
682 hammer_reserve_t resv;
685 * Allocate the reservation if necessary.
687 * NOTE: need lock in future around resv lookup/allocation and
688 * the setdelay call, currently refs is not bumped until the call.
690 again:
691 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
692 if (resv == NULL) {
693 resv = kmalloc(sizeof(*resv), hmp->m_misc,
694 M_WAITOK | M_ZERO | M_USE_RESERVE);
695 resv->zone = zone;
696 resv->zone_offset = base_offset;
697 resv->refs = 0;
698 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
700 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
701 resv->flags |= HAMMER_RESF_LAYER2FREE;
702 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
703 kfree(resv, hmp->m_misc);
704 goto again;
706 ++hammer_count_reservations;
707 } else {
708 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
709 resv->flags |= HAMMER_RESF_LAYER2FREE;
711 hammer_reserve_setdelay(hmp, resv);
715 * Enter the reservation on the on-delay list, or move it if it
716 * is already on the list.
718 static void
719 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
721 if (resv->flags & HAMMER_RESF_ONDELAY) {
722 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
723 resv->flush_group = hmp->flusher.next + 1;
724 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
725 } else {
726 ++resv->refs;
727 ++hmp->rsv_fromdelay;
728 resv->flags |= HAMMER_RESF_ONDELAY;
729 resv->flush_group = hmp->flusher.next + 1;
730 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
734 void
735 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
737 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
738 resv->flags &= ~HAMMER_RESF_ONDELAY;
739 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
740 --hmp->rsv_fromdelay;
741 hammer_blockmap_reserve_complete(hmp, resv);
745 * Backend function - free (offset, bytes) in a zone.
747 * XXX error return
749 void
750 hammer_blockmap_free(hammer_transaction_t trans,
751 hammer_off_t zone_offset, int bytes)
753 hammer_mount_t hmp;
754 hammer_volume_t root_volume;
755 hammer_blockmap_t blockmap;
756 hammer_blockmap_t freemap;
757 struct hammer_blockmap_layer1 *layer1;
758 struct hammer_blockmap_layer2 *layer2;
759 hammer_buffer_t buffer1 = NULL;
760 hammer_buffer_t buffer2 = NULL;
761 hammer_off_t layer1_offset;
762 hammer_off_t layer2_offset;
763 hammer_off_t base_off;
764 int error;
765 int zone;
767 if (bytes == 0)
768 return;
769 hmp = trans->hmp;
772 * Alignment
774 bytes = (bytes + 15) & ~15;
775 KKASSERT(bytes <= HAMMER_XBUFSIZE);
776 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
777 ~HAMMER_LARGEBLOCK_MASK64) == 0);
780 * Basic zone validation & locking
782 zone = HAMMER_ZONE_DECODE(zone_offset);
783 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
784 root_volume = trans->rootvol;
785 error = 0;
787 blockmap = &hmp->blockmap[zone];
788 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
791 * Dive layer 1.
793 layer1_offset = freemap->phys_offset +
794 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
795 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
796 if (error)
797 goto failed;
798 KKASSERT(layer1->phys_offset &&
799 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
800 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
801 hammer_lock_ex(&hmp->blkmap_lock);
802 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
803 panic("CRC FAILED: LAYER1");
804 hammer_unlock(&hmp->blkmap_lock);
808 * Dive layer 2, each entry represents a large-block.
810 layer2_offset = layer1->phys_offset +
811 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
812 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
813 if (error)
814 goto failed;
815 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
816 hammer_lock_ex(&hmp->blkmap_lock);
817 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
818 panic("CRC FAILED: LAYER2");
819 hammer_unlock(&hmp->blkmap_lock);
822 hammer_lock_ex(&hmp->blkmap_lock);
824 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
827 * Free space previously allocated via blockmap_alloc().
829 KKASSERT(layer2->zone == zone);
830 layer2->bytes_free += bytes;
831 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
834 * If a big-block becomes entirely free we must create a covering
835 * reservation to prevent premature reuse. Note, however, that
836 * the big-block and/or reservation may still have an append_off
837 * that allows further (non-reused) allocations.
839 * Once the reservation has been made we re-check layer2 and if
840 * the big-block is still entirely free we reset the layer2 entry.
841 * The reservation will prevent premature reuse.
843 * NOTE: hammer_buffer's are only invalidated when the reservation
844 * is completed, if the layer2 entry is still completely free at
845 * that time. Any allocations from the reservation that may have
846 * occured in the mean time, or active references on the reservation
847 * from new pending allocations, will prevent the invalidation from
848 * occuring.
850 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
851 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
853 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
854 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
855 layer2->zone = 0;
856 layer2->append_off = 0;
857 hammer_modify_buffer(trans, buffer1,
858 layer1, sizeof(*layer1));
859 ++layer1->blocks_free;
860 layer1->layer1_crc = crc32(layer1,
861 HAMMER_LAYER1_CRCSIZE);
862 hammer_modify_buffer_done(buffer1);
863 hammer_modify_volume_field(trans,
864 trans->rootvol,
865 vol0_stat_freebigblocks);
866 ++root_volume->ondisk->vol0_stat_freebigblocks;
867 hmp->copy_stat_freebigblocks =
868 root_volume->ondisk->vol0_stat_freebigblocks;
869 hammer_modify_volume_done(trans->rootvol);
872 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
873 hammer_modify_buffer_done(buffer2);
874 hammer_unlock(&hmp->blkmap_lock);
876 failed:
877 if (buffer1)
878 hammer_rel_buffer(buffer1, 0);
879 if (buffer2)
880 hammer_rel_buffer(buffer2, 0);
884 * Backend function - finalize (offset, bytes) in a zone.
886 * Allocate space that was previously reserved by the frontend.
889 hammer_blockmap_finalize(hammer_transaction_t trans,
890 hammer_reserve_t resv,
891 hammer_off_t zone_offset, int bytes)
893 hammer_mount_t hmp;
894 hammer_volume_t root_volume;
895 hammer_blockmap_t blockmap;
896 hammer_blockmap_t freemap;
897 struct hammer_blockmap_layer1 *layer1;
898 struct hammer_blockmap_layer2 *layer2;
899 hammer_buffer_t buffer1 = NULL;
900 hammer_buffer_t buffer2 = NULL;
901 hammer_off_t layer1_offset;
902 hammer_off_t layer2_offset;
903 int error;
904 int zone;
905 int offset;
907 if (bytes == 0)
908 return(0);
909 hmp = trans->hmp;
912 * Alignment
914 bytes = (bytes + 15) & ~15;
915 KKASSERT(bytes <= HAMMER_XBUFSIZE);
918 * Basic zone validation & locking
920 zone = HAMMER_ZONE_DECODE(zone_offset);
921 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
922 root_volume = trans->rootvol;
923 error = 0;
925 blockmap = &hmp->blockmap[zone];
926 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
929 * Dive layer 1.
931 layer1_offset = freemap->phys_offset +
932 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
933 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
934 if (error)
935 goto failed;
936 KKASSERT(layer1->phys_offset &&
937 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
938 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
939 hammer_lock_ex(&hmp->blkmap_lock);
940 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
941 panic("CRC FAILED: LAYER1");
942 hammer_unlock(&hmp->blkmap_lock);
946 * Dive layer 2, each entry represents a large-block.
948 layer2_offset = layer1->phys_offset +
949 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
950 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
951 if (error)
952 goto failed;
953 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
954 hammer_lock_ex(&hmp->blkmap_lock);
955 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
956 panic("CRC FAILED: LAYER2");
957 hammer_unlock(&hmp->blkmap_lock);
960 hammer_lock_ex(&hmp->blkmap_lock);
962 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
965 * Finalize some or all of the space covered by a current
966 * reservation. An allocation in the same layer may have
967 * already assigned ownership.
969 if (layer2->zone == 0) {
970 hammer_modify_buffer(trans, buffer1,
971 layer1, sizeof(*layer1));
972 --layer1->blocks_free;
973 layer1->layer1_crc = crc32(layer1,
974 HAMMER_LAYER1_CRCSIZE);
975 hammer_modify_buffer_done(buffer1);
976 layer2->zone = zone;
977 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
978 KKASSERT(layer2->append_off == 0);
979 hammer_modify_volume_field(trans,
980 trans->rootvol,
981 vol0_stat_freebigblocks);
982 --root_volume->ondisk->vol0_stat_freebigblocks;
983 hmp->copy_stat_freebigblocks =
984 root_volume->ondisk->vol0_stat_freebigblocks;
985 hammer_modify_volume_done(trans->rootvol);
987 if (layer2->zone != zone)
988 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
989 KKASSERT(layer2->zone == zone);
990 KKASSERT(bytes != 0);
991 layer2->bytes_free -= bytes;
992 if (resv)
993 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
996 * Finalizations can occur out of order, or combined with allocations.
997 * append_off must be set to the highest allocated offset.
999 offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1000 if (layer2->append_off < offset)
1001 layer2->append_off = offset;
1003 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1004 hammer_modify_buffer_done(buffer2);
1005 hammer_unlock(&hmp->blkmap_lock);
1007 failed:
1008 if (buffer1)
1009 hammer_rel_buffer(buffer1, 0);
1010 if (buffer2)
1011 hammer_rel_buffer(buffer2, 0);
1012 return(error);
1016 * Return the number of free bytes in the big-block containing the
1017 * specified blockmap offset.
1020 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1021 int *curp, int *errorp)
1023 hammer_volume_t root_volume;
1024 hammer_blockmap_t blockmap;
1025 hammer_blockmap_t freemap;
1026 struct hammer_blockmap_layer1 *layer1;
1027 struct hammer_blockmap_layer2 *layer2;
1028 hammer_buffer_t buffer = NULL;
1029 hammer_off_t layer1_offset;
1030 hammer_off_t layer2_offset;
1031 int bytes;
1032 int zone;
1034 zone = HAMMER_ZONE_DECODE(zone_offset);
1035 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1036 root_volume = hammer_get_root_volume(hmp, errorp);
1037 if (*errorp) {
1038 *curp = 0;
1039 return(0);
1041 blockmap = &hmp->blockmap[zone];
1042 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1045 * Dive layer 1.
1047 layer1_offset = freemap->phys_offset +
1048 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1049 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1050 if (*errorp) {
1051 bytes = 0;
1052 goto failed;
1054 KKASSERT(layer1->phys_offset);
1055 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1056 hammer_lock_ex(&hmp->blkmap_lock);
1057 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1058 panic("CRC FAILED: LAYER1");
1059 hammer_unlock(&hmp->blkmap_lock);
1063 * Dive layer 2, each entry represents a large-block.
1065 * (reuse buffer, layer1 pointer becomes invalid)
1067 layer2_offset = layer1->phys_offset +
1068 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1069 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1070 if (*errorp) {
1071 bytes = 0;
1072 goto failed;
1074 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1075 hammer_lock_ex(&hmp->blkmap_lock);
1076 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1077 panic("CRC FAILED: LAYER2");
1078 hammer_unlock(&hmp->blkmap_lock);
1080 KKASSERT(layer2->zone == zone);
1082 bytes = layer2->bytes_free;
1084 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1085 *curp = 0;
1086 else
1087 *curp = 1;
1088 failed:
1089 if (buffer)
1090 hammer_rel_buffer(buffer, 0);
1091 hammer_rel_volume(root_volume, 0);
1092 if (hammer_debug_general & 0x0800) {
1093 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1094 (long long)zone_offset, bytes);
1096 return(bytes);
1101 * Lookup a blockmap offset.
1103 hammer_off_t
1104 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1105 int *errorp)
1107 hammer_volume_t root_volume;
1108 hammer_blockmap_t freemap;
1109 struct hammer_blockmap_layer1 *layer1;
1110 struct hammer_blockmap_layer2 *layer2;
1111 hammer_buffer_t buffer = NULL;
1112 hammer_off_t layer1_offset;
1113 hammer_off_t layer2_offset;
1114 hammer_off_t result_offset;
1115 hammer_off_t base_off;
1116 hammer_reserve_t resv;
1117 int zone;
1120 * Calculate the zone-2 offset.
1122 zone = HAMMER_ZONE_DECODE(zone_offset);
1123 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1125 result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1126 HAMMER_ZONE_RAW_BUFFER;
1129 * We can actually stop here, normal blockmaps are now direct-mapped
1130 * onto the freemap and so represent zone-2 addresses.
1132 if (hammer_verify_zone == 0) {
1133 *errorp = 0;
1134 return(result_offset);
1138 * Validate the allocation zone
1140 root_volume = hammer_get_root_volume(hmp, errorp);
1141 if (*errorp)
1142 return(0);
1143 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1144 KKASSERT(freemap->phys_offset != 0);
1147 * Dive layer 1.
1149 layer1_offset = freemap->phys_offset +
1150 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1151 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1152 if (*errorp)
1153 goto failed;
1154 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1155 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1156 hammer_lock_ex(&hmp->blkmap_lock);
1157 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1158 panic("CRC FAILED: LAYER1");
1159 hammer_unlock(&hmp->blkmap_lock);
1163 * Dive layer 2, each entry represents a large-block.
1165 layer2_offset = layer1->phys_offset +
1166 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1167 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1169 if (*errorp)
1170 goto failed;
1171 if (layer2->zone == 0) {
1172 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1173 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1174 base_off);
1175 KKASSERT(resv && resv->zone == zone);
1177 } else if (layer2->zone != zone) {
1178 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1179 layer2->zone, zone);
1181 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1182 hammer_lock_ex(&hmp->blkmap_lock);
1183 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1184 panic("CRC FAILED: LAYER2");
1185 hammer_unlock(&hmp->blkmap_lock);
1188 failed:
1189 if (buffer)
1190 hammer_rel_buffer(buffer, 0);
1191 hammer_rel_volume(root_volume, 0);
1192 if (hammer_debug_general & 0x0800) {
1193 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1194 (long long)zone_offset, (long long)result_offset);
1196 return(result_offset);
1201 * Check space availability
1204 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1206 const int in_size = sizeof(struct hammer_inode_data) +
1207 sizeof(union hammer_btree_elm);
1208 const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1209 int64_t usedbytes;
1211 usedbytes = hmp->rsv_inodes * in_size +
1212 hmp->rsv_recs * rec_size +
1213 hmp->rsv_databytes +
1214 ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1215 ((int64_t)hidirtybufspace << 2) +
1216 (slop << HAMMER_LARGEBLOCK_BITS);
1218 hammer_count_extra_space_used = usedbytes; /* debugging */
1219 if (resp)
1220 *resp = usedbytes;
1222 if (hmp->copy_stat_freebigblocks >=
1223 (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1224 return(0);
1226 return (ENOSPC);