HAMMER 28/many: Implement zoned blockmap
[dragonfly.git] / sbin / hammer / ondisk.c
blob1a87884ad37055e3bb06b63a5d2166aac352f407
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $
37 #include <sys/types.h>
38 #include <assert.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include "hammer_util.h"
48 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
49 struct buffer_info **bufferp);
50 static hammer_off_t alloc_bigblock(void);
51 #if 0
52 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
53 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
54 struct buffer_info **bufp, u_int16_t hdr_type);
55 static void readhammerbuf(struct volume_info *vol, void *data,
56 int64_t offset);
57 #endif
58 static void writehammerbuf(struct volume_info *vol, const void *data,
59 int64_t offset);
62 uuid_t Hammer_FSType;
63 uuid_t Hammer_FSId;
64 int64_t BootAreaSize;
65 int64_t MemAreaSize;
66 int UsingSuperClusters;
67 int NumVolumes;
68 int RootVolNo = -1;
69 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
72 * Lookup the requested information structure and related on-disk buffer.
73 * Missing structures are created.
75 struct volume_info *
76 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
78 struct volume_info *vol;
79 struct volume_info *scan;
80 struct hammer_volume_ondisk *ondisk;
81 int n;
84 * Allocate the volume structure
86 vol = malloc(sizeof(*vol));
87 bzero(vol, sizeof(*vol));
88 TAILQ_INIT(&vol->buffer_list);
89 vol->name = strdup(filename);
90 vol->fd = open(filename, oflags);
91 if (vol->fd < 0) {
92 free(vol->name);
93 free(vol);
94 err(1, "setup_volume: %s: Open failed", filename);
98 * Read or initialize the volume header
100 vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
101 if (isnew) {
102 bzero(ondisk, HAMMER_BUFSIZE);
103 } else {
104 n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
105 if (n != HAMMER_BUFSIZE) {
106 err(1, "setup_volume: %s: Read failed at offset 0",
107 filename);
109 vol_no = ondisk->vol_no;
110 if (RootVolNo < 0) {
111 RootVolNo = ondisk->vol_rootvol;
112 } else if (RootVolNo != (int)ondisk->vol_rootvol) {
113 errx(1, "setup_volume: %s: root volume disagreement: "
114 "%d vs %d",
115 vol->name, RootVolNo, ondisk->vol_rootvol);
118 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
119 errx(1, "setup_volume: %s: Header does not indicate "
120 "that this is a hammer volume", vol->name);
122 if (TAILQ_EMPTY(&VolList)) {
123 Hammer_FSId = vol->ondisk->vol_fsid;
124 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
125 errx(1, "setup_volume: %s: FSId does match other "
126 "volumes!", vol->name);
129 vol->vol_no = vol_no;
131 if (isnew) {
132 /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
133 vol->cache.modified = 1;
137 * Link the volume structure in
139 TAILQ_FOREACH(scan, &VolList, entry) {
140 if (scan->vol_no == vol_no) {
141 errx(1, "setup_volume %s: Duplicate volume number %d "
142 "against %s", filename, vol_no, scan->name);
145 TAILQ_INSERT_TAIL(&VolList, vol, entry);
146 return(vol);
149 struct volume_info *
150 get_volume(int32_t vol_no)
152 struct volume_info *vol;
154 TAILQ_FOREACH(vol, &VolList, entry) {
155 if (vol->vol_no == vol_no)
156 break;
158 if (vol == NULL)
159 errx(1, "get_volume: Volume %d does not exist!", vol_no);
160 ++vol->cache.refs;
161 /* not added to or removed from hammer cache */
162 return(vol);
165 void
166 rel_volume(struct volume_info *volume)
168 /* not added to or removed from hammer cache */
169 --volume->cache.refs;
173 * Acquire the specified buffer.
175 struct buffer_info *
176 get_buffer(hammer_off_t buf_offset, int isnew)
178 void *ondisk;
179 struct buffer_info *buf;
180 struct volume_info *volume;
181 int n;
182 int vol_no;
184 assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
186 vol_no = HAMMER_VOL_DECODE(buf_offset);
187 volume = get_volume(vol_no);
188 buf_offset &= ~HAMMER_BUFMASK64;
190 TAILQ_FOREACH(buf, &volume->buffer_list, entry) {
191 if (buf->buf_offset == buf_offset)
192 break;
194 if (buf == NULL) {
195 buf = malloc(sizeof(*buf));
196 bzero(buf, sizeof(*buf));
197 buf->buf_offset = buf_offset;
198 buf->buf_disk_offset = volume->ondisk->vol_buf_beg +
199 (buf_offset & HAMMER_OFF_SHORT_MASK);
200 buf->volume = volume;
201 TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry);
202 ++volume->cache.refs;
203 buf->cache.u.buffer = buf;
204 hammer_cache_add(&buf->cache, ISBUFFER);
206 ++buf->cache.refs;
207 hammer_cache_flush();
208 if ((ondisk = buf->ondisk) == NULL) {
209 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
210 if (isnew == 0) {
211 n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
212 buf->buf_disk_offset);
213 if (n != HAMMER_BUFSIZE) {
214 err(1, "get_buffer: %s:%016llx Read failed at "
215 "offset %lld",
216 volume->name, buf->buf_offset,
217 buf->buf_disk_offset);
221 if (isnew) {
222 bzero(ondisk, HAMMER_BUFSIZE);
223 buf->cache.modified = 1;
225 return(buf);
228 void
229 rel_buffer(struct buffer_info *buffer)
231 struct volume_info *volume;
233 assert(buffer->cache.refs > 0);
234 if (--buffer->cache.refs == 0) {
235 if (buffer->cache.delete) {
236 volume = buffer->volume;
237 if (buffer->cache.modified)
238 flush_buffer(buffer);
239 TAILQ_REMOVE(&volume->buffer_list, buffer, entry);
240 hammer_cache_del(&buffer->cache);
241 free(buffer->ondisk);
242 free(buffer);
243 rel_volume(volume);
248 void *
249 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
250 int isnew)
252 struct buffer_info *buffer;
254 if (*bufferp) {
255 rel_buffer(*bufferp);
257 buffer = *bufferp = get_buffer(buf_offset, isnew);
258 return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK));
262 * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying
263 * bufp is freed if non-NULL and a referenced buffer is loaded into it.
265 hammer_node_ondisk_t
266 get_node(hammer_off_t node_offset, struct buffer_info **bufp)
268 struct buffer_info *buf;
270 if (*bufp)
271 rel_buffer(*bufp);
272 *bufp = buf = get_buffer(node_offset, 0);
273 return((void *)((char *)buf->ondisk +
274 (int32_t)(node_offset & HAMMER_BUFMASK)));
278 * Allocate HAMMER elements - btree nodes, data storage, and record elements
280 * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
281 * item and zero's out the remainder, so don't bzero() it.
283 void *
284 alloc_btree_element(hammer_off_t *offp)
286 struct buffer_info *buffer = NULL;
287 hammer_node_ondisk_t node;
289 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
290 offp, &buffer);
291 bzero(node, sizeof(*node));
292 /* XXX buffer not released, pointer remains valid */
293 return(node);
296 hammer_record_ondisk_t
297 alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap)
299 struct buffer_info *record_buffer = NULL;
300 struct buffer_info *data_buffer = NULL;
301 hammer_record_ondisk_t rec;
303 rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec),
304 offp, &record_buffer);
305 bzero(rec, sizeof(*rec));
307 if (data_len >= HAMMER_BUFSIZE) {
308 assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
309 *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
310 &rec->base.data_off, &data_buffer);
311 rec->base.data_len = data_len;
312 bzero(*datap, data_len);
313 } else if (data_len) {
314 *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
315 &rec->base.data_off, &data_buffer);
316 rec->base.data_len = data_len;
317 bzero(*datap, data_len);
318 } else {
319 *datap = NULL;
321 /* XXX buf not released, ptr remains valid */
322 return(rec);
326 * Format a new blockmap
328 void
329 format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off)
331 blockmap->phys_offset = alloc_bigblock();
332 blockmap->alloc_offset = zone_off;
335 static
336 void *
337 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
338 struct buffer_info **bufferp)
340 struct buffer_info *buffer;
341 struct volume_info *volume;
342 hammer_blockmap_entry_t rootmap;
343 hammer_blockmap_entry_t blockmap;
344 void *ptr;
345 int i;
347 volume = get_volume(RootVolNo);
349 rootmap = &volume->ondisk->vol0_blockmap[zone];
352 * Alignment and buffer-boundary issues
354 bytes = (bytes + 7) & ~7;
355 if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) &
356 ~HAMMER_BUFMASK64) {
357 volume->cache.modified = 1;
358 rootmap->phys_offset = (rootmap->phys_offset + bytes) &
359 ~HAMMER_BUFMASK64;
363 * Dive layer 2
365 i = (rootmap->alloc_offset >> (HAMMER_LARGEBLOCK_BITS +
366 HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK;
368 blockmap = get_buffer_data(rootmap->phys_offset + i * sizeof(*blockmap),
369 bufferp, 0);
370 buffer = *bufferp;
371 if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) {
372 buffer->cache.modified = 1;
373 bzero(blockmap, sizeof(*blockmap));
374 blockmap->phys_offset = alloc_bigblock();
378 * Dive layer 1
380 i = (rootmap->alloc_offset >> HAMMER_LARGEBLOCK_BITS) &
381 HAMMER_BLOCKMAP_RADIX_MASK;
383 blockmap = get_buffer_data(
384 blockmap->phys_offset + i * sizeof(*blockmap), bufferp, 0);
385 buffer = *bufferp;
387 if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
388 buffer->cache.modified = 1;
389 bzero(blockmap, sizeof(*blockmap));
390 blockmap->phys_offset = alloc_bigblock();
391 blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE;
394 buffer->cache.modified = 1;
395 volume->cache.modified = 1;
396 blockmap->bytes_free -= bytes;
397 *result_offp = rootmap->alloc_offset;
398 rootmap->alloc_offset += bytes;
400 i = (rootmap->phys_offset >> HAMMER_BUFFER_BITS) &
401 HAMMER_BUFFERS_PER_LARGEBLOCK_MASK;
402 ptr = get_buffer_data(
403 blockmap->phys_offset + i * HAMMER_BUFSIZE +
404 ((int32_t)*result_offp & HAMMER_BUFMASK), bufferp, 0);
405 buffer->cache.modified = 1;
407 rel_volume(volume);
408 return(ptr);
411 static
412 hammer_off_t
413 alloc_bigblock(void)
415 struct volume_info *volume;
416 hammer_off_t result_offset;
418 volume = get_volume(RootVolNo);
419 result_offset = volume->ondisk->vol0_free_off;
420 volume->ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE;
421 if ((volume->ondisk->vol0_free_off & HAMMER_OFF_SHORT_MASK) >
422 (hammer_off_t)(volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)) {
423 panic("alloc_bigblock: Ran out of room, filesystem too small");
425 rel_volume(volume);
426 return(result_offset);
429 #if 0
431 * Reserve space from the FIFO. Make sure that bytes does not cross a
432 * record boundary.
434 * Zero out base_bytes and initialize the fifo head and tail. The
435 * data area is not zerod.
437 static
438 hammer_off_t
439 hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
440 struct buffer_info **bufp, u_int16_t hdr_type)
442 struct buffer_info *buf;
443 struct volume_info *volume;
444 hammer_fifo_head_t head;
445 hammer_fifo_tail_t tail;
446 hammer_off_t off;
447 int32_t aligned_bytes;
449 aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE +
450 HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
452 volume = get_volume(RootVolNo);
453 off = volume->ondisk->vol0_fifo_end;
456 * For now don't deal with transitions across buffer boundaries,
457 * only newfs_hammer uses this function.
459 assert((off & ~HAMMER_BUFMASK64) ==
460 ((off + aligned_bytes) & ~HAMMER_BUFMASK));
462 *bufp = buf = get_buffer(off, 0);
464 buf->cache.modified = 1;
465 volume->cache.modified = 1;
467 head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
468 bzero(head, base_bytes);
470 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
471 head->hdr_type = hdr_type;
472 head->hdr_size = aligned_bytes;
473 head->hdr_seq = volume->ondisk->vol0_next_seq++;
475 tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE);
476 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
477 tail->tail_type = hdr_type;
478 tail->tail_size = aligned_bytes;
480 volume->ondisk->vol0_fifo_end += aligned_bytes;
481 volume->cache.modified = 1;
483 rel_volume(volume);
485 return(off);
488 #endif
491 * Flush various tracking structures to disk
495 * Flush various tracking structures to disk
497 void
498 flush_all_volumes(void)
500 struct volume_info *vol;
502 TAILQ_FOREACH(vol, &VolList, entry)
503 flush_volume(vol);
506 void
507 flush_volume(struct volume_info *volume)
509 struct buffer_info *buffer;
511 TAILQ_FOREACH(buffer, &volume->buffer_list, entry)
512 flush_buffer(buffer);
513 writehammerbuf(volume, volume->ondisk, 0);
514 volume->cache.modified = 0;
517 void
518 flush_buffer(struct buffer_info *buffer)
520 writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset);
521 buffer->cache.modified = 0;
524 #if 0
526 * Generic buffer initialization
528 static void
529 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
531 head->hdr_signature = HAMMER_HEAD_SIGNATURE;
532 head->hdr_type = hdr_type;
533 head->hdr_size = 0;
534 head->hdr_crc = 0;
535 head->hdr_seq = 0;
538 #endif
540 #if 0
542 * Core I/O operations
544 static void
545 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
547 ssize_t n;
549 n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
550 if (n != HAMMER_BUFSIZE)
551 err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
554 #endif
556 static void
557 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
559 ssize_t n;
561 n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
562 if (n != HAMMER_BUFSIZE)
563 err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
566 void
567 panic(const char *ctl, ...)
569 va_list va;
571 va_start(va, ctl);
572 vfprintf(stderr, ctl, va);
573 va_end(va);
574 fprintf(stderr, "\n");
575 exit(1);