HAMMER - Implement experimental volume removal
[dragonfly.git] / sys / vfs / hammer / hammer_volume.c
blobff6b64fbf788c4885c4ed56625ffedc4b53363a7
1 /*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com> and
6 * Michael Neumann <mneumann@ntecs.de>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
37 #include "hammer.h"
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
40 #include <sys/buf.h>
42 static int
43 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
45 static void
46 hammer_close_device(struct vnode **devvpp, int ronly);
48 static int
49 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
50 const char *vol_name, int vol_no, int vol_count,
51 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
53 static uint64_t
54 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume);
56 static int
57 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume);
60 int
61 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
62 struct hammer_ioc_volume *ioc)
64 struct hammer_mount *hmp = trans->hmp;
65 struct mount *mp = hmp->mp;
66 hammer_volume_t volume;
67 int error;
69 if (mp->mnt_flag & MNT_RDONLY) {
70 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
71 return (EINVAL);
74 if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
75 kprintf("Max number of HAMMER volumes exceeded\n");
76 return (EINVAL);
80 * Find an unused volume number.
82 int free_vol_no = 0;
83 while (free_vol_no < HAMMER_MAX_VOLUMES &&
84 RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
85 ++free_vol_no;
87 if (free_vol_no >= HAMMER_MAX_VOLUMES) {
88 kprintf("Max number of HAMMER volumes exceeded\n");
89 return (EINVAL);
92 struct vnode *devvp = NULL;
93 error = hammer_setup_device(&devvp, ioc->device_name, 0);
94 if (error)
95 goto end;
96 KKASSERT(devvp);
97 error = hammer_format_volume_header(
98 hmp,
99 devvp,
100 hmp->rootvol->ondisk->vol_name,
101 free_vol_no,
102 hmp->nvolumes+1,
103 ioc->vol_size,
104 ioc->boot_area_size,
105 ioc->mem_area_size);
106 hammer_close_device(&devvp, 0);
107 if (error)
108 goto end;
110 error = hammer_install_volume(hmp, ioc->device_name, NULL);
111 if (error)
112 goto end;
114 hammer_sync_lock_sh(trans);
115 hammer_lock_ex(&hmp->blkmap_lock);
117 ++hmp->nvolumes;
120 * Set each volumes new value of the vol_count field.
122 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
123 if (vol_no == free_vol_no)
124 continue;
126 volume = hammer_get_volume(hmp, vol_no, &error);
127 if (volume == NULL && error == ENOENT) {
129 * Skip unused volume numbers
131 error = 0;
132 continue;
134 KKASSERT(volume != NULL && error == 0);
135 hammer_modify_volume_field(trans, volume, vol_count);
136 volume->ondisk->vol_count = hmp->nvolumes;
137 hammer_modify_volume_done(volume);
138 hammer_rel_volume(volume, 0);
141 volume = hammer_get_volume(hmp, free_vol_no, &error);
142 KKASSERT(volume != NULL && error == 0);
144 uint64_t total_free_bigblocks =
145 hammer_format_freemap(trans, volume);
148 * Increase the total number of bigblocks
150 hammer_modify_volume_field(trans, trans->rootvol,
151 vol0_stat_bigblocks);
152 trans->rootvol->ondisk->vol0_stat_bigblocks += total_free_bigblocks;
153 hammer_modify_volume_done(trans->rootvol);
156 * Increase the number of free bigblocks
157 * (including the copy in hmp)
159 hammer_modify_volume_field(trans, trans->rootvol,
160 vol0_stat_freebigblocks);
161 trans->rootvol->ondisk->vol0_stat_freebigblocks += total_free_bigblocks;
162 hmp->copy_stat_freebigblocks =
163 trans->rootvol->ondisk->vol0_stat_freebigblocks;
164 hammer_modify_volume_done(trans->rootvol);
166 hammer_rel_volume(volume, 0);
168 hammer_unlock(&hmp->blkmap_lock);
169 hammer_sync_unlock(trans);
171 end:
172 if (error)
173 kprintf("An error occurred: %d\n", error);
174 return (error);
179 * Remove a volume.
182 hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
183 struct hammer_ioc_volume *ioc)
185 struct hammer_mount *hmp = trans->hmp;
186 struct mount *mp = hmp->mp;
187 hammer_volume_t volume;
188 int error = 0;
190 if (mp->mnt_flag & MNT_RDONLY) {
191 kprintf("Cannot del volume from read-only HAMMER filesystem\n");
192 return (EINVAL);
196 volume = NULL;
199 * find volume by volname
201 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
202 volume = hammer_get_volume(hmp, vol_no, &error);
203 if (volume == NULL && error == ENOENT) {
205 * Skip unused volume numbers
207 error = 0;
208 continue;
210 KKASSERT(volume != NULL && error == 0);
211 if (strcmp(volume->vol_name, ioc->device_name) == 0) {
212 break;
214 volume = NULL;
217 if (!volume) {
218 kprintf("Couldn't find volume\n");
219 return (EINVAL);
222 if (volume == trans->rootvol) {
223 kprintf("Cannot remove root-volume\n");
224 hammer_rel_volume(volume, 0);
225 return (EINVAL);
232 hmp->volume_to_remove = volume->vol_no;
234 struct hammer_ioc_reblock reblock;
235 bzero(&reblock, sizeof(reblock));
237 reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
238 reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
239 reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
240 reblock.key_end.obj_id = HAMMER_MAX_OBJID;
241 //reblock.head.flags = flags & HAMMER_IOC_DO_FLAGS;
242 reblock.free_level = HAMMER_LARGEBLOCK_SIZE;
243 reblock.free_level = 0;
245 error = hammer_ioc_reblock(trans, ip, &reblock);
247 if (error) {
248 kprintf("reblock failed: %d\n", error);
249 hmp->volume_to_remove = -1;
250 hammer_rel_volume(volume, 0);
251 return (error);
254 hammer_sync_lock_sh(trans);
255 hammer_lock_ex(&hmp->blkmap_lock);
257 error = hammer_free_freemap(trans, volume);
258 if (error) {
259 kprintf("Failed to free volume\n");
260 hmp->volume_to_remove = -1;
261 hammer_rel_volume(volume, 0);
262 hammer_unlock(&hmp->blkmap_lock);
263 hammer_sync_unlock(trans);
264 return (error);
267 hmp->volume_to_remove = -1;
268 hammer_rel_volume(volume, 1);
270 /* XXX: unload volume! */
271 /*error = hammer_unload_volume(volume, NULL);
272 if (error == -1) {
273 kprintf("Failed to unload volume\n");
274 hammer_unlock(&hmp->blkmap_lock);
275 hammer_sync_unlock(trans);
276 return (error);
279 --hmp->nvolumes;
282 * Set each volume's new value of the vol_count field.
284 for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
285 volume = hammer_get_volume(hmp, vol_no, &error);
286 if (volume == NULL && error == ENOENT) {
288 * Skip unused volume numbers
290 error = 0;
291 continue;
293 KKASSERT(volume != NULL && error == 0);
294 hammer_modify_volume_field(trans, volume, vol_count);
295 volume->ondisk->vol_count = hmp->nvolumes;
296 hammer_modify_volume_done(volume);
297 hammer_rel_volume(volume, 0);
300 hammer_unlock(&hmp->blkmap_lock);
301 hammer_sync_unlock(trans);
303 return (0);
308 * Iterate over all usable L1 entries of the volume and
309 * the corresponding L2 entries.
311 static int
312 hammer_iterate_l1l2_entries(hammer_transaction_t trans, hammer_volume_t volume,
313 int (*callback)(hammer_transaction_t, hammer_buffer_t *,
314 struct hammer_blockmap_layer1*, struct hammer_blockmap_layer2 *,
315 hammer_off_t, int, void *),
316 void *data)
318 struct hammer_mount *hmp = trans->hmp;
319 hammer_blockmap_t freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
320 hammer_buffer_t buffer = NULL;
321 int error = 0;
323 hammer_off_t phys_off;
324 hammer_off_t block_off;
325 hammer_off_t layer1_off;
326 hammer_off_t layer2_off;
327 hammer_off_t aligned_buf_end_off;
328 struct hammer_blockmap_layer1 *layer1;
329 struct hammer_blockmap_layer2 *layer2;
332 * Calculate the usable size of the volume, which
333 * must be aligned at a bigblock (8 MB) boundary.
335 aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
336 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
337 & ~HAMMER_LARGEBLOCK_MASK64));
340 * Iterate the volume's address space in chunks of 4 TB, where each
341 * chunk consists of at least one physically available 8 MB bigblock.
343 * For each chunk we need one L1 entry and one L2 bigblock.
344 * We use the first bigblock of each chunk as L2 block.
346 for (phys_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
347 phys_off < aligned_buf_end_off;
348 phys_off += HAMMER_BLOCKMAP_LAYER2) {
349 for (block_off = 0;
350 block_off < HAMMER_BLOCKMAP_LAYER2;
351 block_off += HAMMER_LARGEBLOCK_SIZE) {
352 layer2_off = phys_off +
353 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
354 layer2 = hammer_bread(hmp, layer2_off, &error,
355 &buffer);
356 if (error)
357 goto end;
359 int zone;
360 if (block_off == 0) {
362 * The first entry represents the L2 bigblock
363 * itself.
365 zone = HAMMER_ZONE_FREEMAP_INDEX;
366 } else if (phys_off + block_off < aligned_buf_end_off) {
368 * Available bigblock
370 zone = 0;
371 } else {
373 * Bigblock outside of physically available
374 * space
376 zone = HAMMER_ZONE_UNAVAIL_INDEX;
379 error = callback(trans, &buffer, NULL, layer2, 0, zone,
380 data);
381 if (error)
382 goto end;
385 layer1_off = freemap->phys_offset +
386 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_off);
387 layer1 = hammer_bread(hmp, layer1_off, &error, &buffer);
388 if (error)
389 goto end;
391 error = callback(trans, &buffer, layer1, NULL, phys_off, 0,
392 data);
393 if (error)
394 goto end;
397 end:
398 if (buffer) {
399 hammer_rel_buffer(buffer, 0);
400 buffer = NULL;
403 return error;
406 struct format_bigblock_stat {
407 uint64_t total_free_bigblocks;
408 uint64_t free_bigblocks;
411 static int
412 format_callback(hammer_transaction_t trans, hammer_buffer_t *bufferp,
413 struct hammer_blockmap_layer1 *layer1,
414 struct hammer_blockmap_layer2 *layer2,
415 hammer_off_t phys_off,
416 int layer2_zone,
417 void *data)
419 struct format_bigblock_stat *stat = (struct format_bigblock_stat*)data;
421 if (layer1) {
422 KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
424 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
425 bzero(layer1, sizeof(layer1));
426 layer1->phys_offset = phys_off;
427 layer1->blocks_free = stat->free_bigblocks;
428 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
429 hammer_modify_buffer_done(*bufferp);
431 stat->total_free_bigblocks += stat->free_bigblocks;
432 stat->free_bigblocks = 0; /* reset */
433 } else if (layer2) {
434 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
435 bzero(layer2, sizeof(*layer2));
437 layer2->zone = layer2_zone;
439 switch (layer2->zone) {
440 case HAMMER_ZONE_FREEMAP_INDEX:
442 * The first entry represents the L2 bigblock itself.
444 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
445 layer2->bytes_free = 0;
446 break;
448 case 0:
450 * Available bigblock
452 layer2->append_off = 0;
453 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
454 ++stat->free_bigblocks;
455 break;
457 case HAMMER_ZONE_UNAVAIL_INDEX:
459 * Bigblock outside of physically available space
461 layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
462 layer2->bytes_free = 0;
463 break;
464 default:
465 KKASSERT(0);
468 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
469 hammer_modify_buffer_done(*bufferp);
470 } else {
471 KKASSERT(0);
474 return 0;
477 static uint64_t
478 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume)
480 int error = 0;
482 struct format_bigblock_stat stat;
483 stat.total_free_bigblocks = 0;
484 stat.free_bigblocks = 0;
486 error = hammer_iterate_l1l2_entries(trans, volume, format_callback,
487 (void*)&stat);
488 KKASSERT(error == 0);
490 return stat.total_free_bigblocks;
493 static int
494 free_callback(hammer_transaction_t trans, hammer_buffer_t *bufferp,
495 struct hammer_blockmap_layer1 *layer1,
496 struct hammer_blockmap_layer2 *layer2,
497 hammer_off_t phys_off,
498 int layer2_zone,
499 void *data __unused)
501 if (layer1) {
503 * Free the L1 entry
505 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
506 bzero(layer1, sizeof(layer1));
507 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
508 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
509 hammer_modify_buffer_done(*bufferp);
511 return 0;
512 } else if (layer2) {
513 switch (layer2_zone) {
514 case HAMMER_ZONE_FREEMAP_INDEX:
515 case HAMMER_ZONE_UNAVAIL_INDEX:
516 return 0;
517 case 0:
518 if (layer2->append_off == 0 &&
519 layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
520 return 0;
521 } else {
522 return EINVAL; /* FIXME */
524 default:
525 return EINVAL; /* FIXME */
527 } else {
528 KKASSERT(0);
531 return EINVAL;
534 static int
535 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume)
537 return hammer_iterate_l1l2_entries(trans, volume, free_callback, NULL);
540 /************************************************************************
541 * MISC *
542 ************************************************************************
545 static int
546 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
548 int error;
549 struct nlookupdata nd;
552 * Get the device vnode
554 if (*devvpp == NULL) {
555 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
556 if (error == 0)
557 error = nlookup(&nd);
558 if (error == 0)
559 error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
560 nlookup_done(&nd);
561 } else {
562 error = 0;
565 if (error == 0) {
566 if (vn_isdisk(*devvpp, &error)) {
567 error = vfs_mountedon(*devvpp);
570 if (error == 0 && vcount(*devvpp) > 0)
571 error = EBUSY;
572 if (error == 0) {
573 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
574 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
575 if (error == 0) {
576 error = VOP_OPEN(*devvpp,
577 (ronly ? FREAD : FREAD|FWRITE),
578 FSCRED, NULL);
580 vn_unlock(*devvpp);
582 if (error && *devvpp) {
583 vrele(*devvpp);
584 *devvpp = NULL;
586 return (error);
589 static void
590 hammer_close_device(struct vnode **devvpp, int ronly)
592 VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE));
593 if (*devvpp) {
594 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
595 vrele(*devvpp);
596 *devvpp = NULL;
600 static int
601 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
602 const char *vol_name, int vol_no, int vol_count,
603 int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
605 struct buf *bp = NULL;
606 struct hammer_volume_ondisk *ondisk;
607 int error;
610 * Extract the volume number from the volume header and do various
611 * sanity checks.
613 KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
614 error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
615 if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
616 goto late_failure;
618 ondisk = (struct hammer_volume_ondisk*) bp->b_data;
621 * Note that we do NOT allow to use a device that contains
622 * a valid HAMMER signature. It has to be cleaned up with dd
623 * before.
625 if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
626 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
627 "%s denied. Erase with dd!\n", vol_name);
628 error = EFTYPE;
629 goto late_failure;
632 bzero(ondisk, sizeof(struct hammer_volume_ondisk));
633 ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
634 ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
635 ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
636 ondisk->vol_fsid = hmp->fsid;
637 ondisk->vol_rootvol = hmp->rootvol->vol_no;
638 ondisk->vol_no = vol_no;
639 ondisk->vol_count = vol_count;
640 ondisk->vol_version = hmp->version;
643 * Reserve space for (future) header junk, setup our poor-man's
644 * bigblock allocator.
646 int64_t vol_alloc = HAMMER_BUFSIZE * 16;
648 ondisk->vol_bot_beg = vol_alloc;
649 vol_alloc += boot_area_size;
650 ondisk->vol_mem_beg = vol_alloc;
651 vol_alloc += mem_area_size;
654 * The remaining area is the zone 2 buffer allocation area. These
655 * buffers
657 ondisk->vol_buf_beg = vol_alloc;
658 ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
660 if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
661 kprintf("volume %d %s is too small to hold the volume header",
662 ondisk->vol_no, ondisk->vol_name);
663 error = EFTYPE;
664 goto late_failure;
667 ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
668 HAMMER_BUFSIZE;
669 ondisk->vol_blocksize = HAMMER_BUFSIZE;
672 * Write volume header to disk
674 error = bwrite(bp);
675 bp = NULL;
677 late_failure:
678 if (bp)
679 brelse(bp);
680 return (error);