2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $
37 #include <sys/types.h>
46 #include "hammer_util.h"
48 static void *alloc_blockmap(int zone
, int bytes
, hammer_off_t
*result_offp
,
49 struct buffer_info
**bufferp
);
50 static hammer_off_t
alloc_bigblock(void);
52 static void init_fifo_head(hammer_fifo_head_t head
, u_int16_t hdr_type
);
53 static hammer_off_t
hammer_alloc_fifo(int32_t base_bytes
, int32_t ext_bytes
,
54 struct buffer_info
**bufp
, u_int16_t hdr_type
);
55 static void readhammerbuf(struct volume_info
*vol
, void *data
,
58 static void writehammerbuf(struct volume_info
*vol
, const void *data
,
66 int UsingSuperClusters
;
69 struct volume_list VolList
= TAILQ_HEAD_INITIALIZER(VolList
);
72 * Lookup the requested information structure and related on-disk buffer.
73 * Missing structures are created.
76 setup_volume(int32_t vol_no
, const char *filename
, int isnew
, int oflags
)
78 struct volume_info
*vol
;
79 struct volume_info
*scan
;
80 struct hammer_volume_ondisk
*ondisk
;
84 * Allocate the volume structure
86 vol
= malloc(sizeof(*vol
));
87 bzero(vol
, sizeof(*vol
));
88 TAILQ_INIT(&vol
->buffer_list
);
89 vol
->name
= strdup(filename
);
90 vol
->fd
= open(filename
, oflags
);
94 err(1, "setup_volume: %s: Open failed", filename
);
98 * Read or initialize the volume header
100 vol
->ondisk
= ondisk
= malloc(HAMMER_BUFSIZE
);
102 bzero(ondisk
, HAMMER_BUFSIZE
);
104 n
= pread(vol
->fd
, ondisk
, HAMMER_BUFSIZE
, 0);
105 if (n
!= HAMMER_BUFSIZE
) {
106 err(1, "setup_volume: %s: Read failed at offset 0",
109 vol_no
= ondisk
->vol_no
;
111 RootVolNo
= ondisk
->vol_rootvol
;
112 } else if (RootVolNo
!= (int)ondisk
->vol_rootvol
) {
113 errx(1, "setup_volume: %s: root volume disagreement: "
115 vol
->name
, RootVolNo
, ondisk
->vol_rootvol
);
118 if (bcmp(&Hammer_FSType
, &ondisk
->vol_fstype
, sizeof(Hammer_FSType
)) != 0) {
119 errx(1, "setup_volume: %s: Header does not indicate "
120 "that this is a hammer volume", vol
->name
);
122 if (TAILQ_EMPTY(&VolList
)) {
123 Hammer_FSId
= vol
->ondisk
->vol_fsid
;
124 } else if (bcmp(&Hammer_FSId
, &ondisk
->vol_fsid
, sizeof(Hammer_FSId
)) != 0) {
125 errx(1, "setup_volume: %s: FSId does match other "
126 "volumes!", vol
->name
);
129 vol
->vol_no
= vol_no
;
132 /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
133 vol
->cache
.modified
= 1;
137 * Link the volume structure in
139 TAILQ_FOREACH(scan
, &VolList
, entry
) {
140 if (scan
->vol_no
== vol_no
) {
141 errx(1, "setup_volume %s: Duplicate volume number %d "
142 "against %s", filename
, vol_no
, scan
->name
);
145 TAILQ_INSERT_TAIL(&VolList
, vol
, entry
);
150 get_volume(int32_t vol_no
)
152 struct volume_info
*vol
;
154 TAILQ_FOREACH(vol
, &VolList
, entry
) {
155 if (vol
->vol_no
== vol_no
)
159 errx(1, "get_volume: Volume %d does not exist!", vol_no
);
161 /* not added to or removed from hammer cache */
166 rel_volume(struct volume_info
*volume
)
168 /* not added to or removed from hammer cache */
169 --volume
->cache
.refs
;
173 * Acquire the specified buffer.
176 get_buffer(hammer_off_t buf_offset
, int isnew
)
179 struct buffer_info
*buf
;
180 struct volume_info
*volume
;
184 assert((buf_offset
& HAMMER_OFF_ZONE_MASK
) == HAMMER_ZONE_RAW_BUFFER
);
186 vol_no
= HAMMER_VOL_DECODE(buf_offset
);
187 volume
= get_volume(vol_no
);
188 buf_offset
&= ~HAMMER_BUFMASK64
;
190 TAILQ_FOREACH(buf
, &volume
->buffer_list
, entry
) {
191 if (buf
->buf_offset
== buf_offset
)
195 buf
= malloc(sizeof(*buf
));
196 bzero(buf
, sizeof(*buf
));
197 buf
->buf_offset
= buf_offset
;
198 buf
->buf_disk_offset
= volume
->ondisk
->vol_buf_beg
+
199 (buf_offset
& HAMMER_OFF_SHORT_MASK
);
200 buf
->volume
= volume
;
201 TAILQ_INSERT_TAIL(&volume
->buffer_list
, buf
, entry
);
202 ++volume
->cache
.refs
;
203 buf
->cache
.u
.buffer
= buf
;
204 hammer_cache_add(&buf
->cache
, ISBUFFER
);
207 hammer_cache_flush();
208 if ((ondisk
= buf
->ondisk
) == NULL
) {
209 buf
->ondisk
= ondisk
= malloc(HAMMER_BUFSIZE
);
211 n
= pread(volume
->fd
, ondisk
, HAMMER_BUFSIZE
,
212 buf
->buf_disk_offset
);
213 if (n
!= HAMMER_BUFSIZE
) {
214 err(1, "get_buffer: %s:%016llx Read failed at "
216 volume
->name
, buf
->buf_offset
,
217 buf
->buf_disk_offset
);
222 bzero(ondisk
, HAMMER_BUFSIZE
);
223 buf
->cache
.modified
= 1;
229 rel_buffer(struct buffer_info
*buffer
)
231 struct volume_info
*volume
;
233 assert(buffer
->cache
.refs
> 0);
234 if (--buffer
->cache
.refs
== 0) {
235 if (buffer
->cache
.delete) {
236 volume
= buffer
->volume
;
237 if (buffer
->cache
.modified
)
238 flush_buffer(buffer
);
239 TAILQ_REMOVE(&volume
->buffer_list
, buffer
, entry
);
240 hammer_cache_del(&buffer
->cache
);
241 free(buffer
->ondisk
);
249 get_buffer_data(hammer_off_t buf_offset
, struct buffer_info
**bufferp
,
252 struct buffer_info
*buffer
;
255 rel_buffer(*bufferp
);
257 buffer
= *bufferp
= get_buffer(buf_offset
, isnew
);
258 return((char *)buffer
->ondisk
+ ((int32_t)buf_offset
& HAMMER_BUFMASK
));
262 * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying
263 * bufp is freed if non-NULL and a referenced buffer is loaded into it.
266 get_node(hammer_off_t node_offset
, struct buffer_info
**bufp
)
268 struct buffer_info
*buf
;
272 *bufp
= buf
= get_buffer(node_offset
, 0);
273 return((void *)((char *)buf
->ondisk
+
274 (int32_t)(node_offset
& HAMMER_BUFMASK
)));
278 * Allocate HAMMER elements - btree nodes, data storage, and record elements
280 * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
281 * item and zero's out the remainder, so don't bzero() it.
284 alloc_btree_element(hammer_off_t
*offp
)
286 struct buffer_info
*buffer
= NULL
;
287 hammer_node_ondisk_t node
;
289 node
= alloc_blockmap(HAMMER_ZONE_BTREE_INDEX
, sizeof(*node
),
291 bzero(node
, sizeof(*node
));
292 /* XXX buffer not released, pointer remains valid */
296 hammer_record_ondisk_t
297 alloc_record_element(hammer_off_t
*offp
, int32_t data_len
, void **datap
)
299 struct buffer_info
*record_buffer
= NULL
;
300 struct buffer_info
*data_buffer
= NULL
;
301 hammer_record_ondisk_t rec
;
303 rec
= alloc_blockmap(HAMMER_ZONE_RECORD_INDEX
, sizeof(*rec
),
304 offp
, &record_buffer
);
305 bzero(rec
, sizeof(*rec
));
307 if (data_len
>= HAMMER_BUFSIZE
) {
308 assert(data_len
<= HAMMER_BUFSIZE
); /* just one buffer */
309 *datap
= alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX
, data_len
,
310 &rec
->base
.data_off
, &data_buffer
);
311 rec
->base
.data_len
= data_len
;
312 bzero(*datap
, data_len
);
313 } else if (data_len
) {
314 *datap
= alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX
, data_len
,
315 &rec
->base
.data_off
, &data_buffer
);
316 rec
->base
.data_len
= data_len
;
317 bzero(*datap
, data_len
);
321 /* XXX buf not released, ptr remains valid */
326 * Format a new blockmap
329 format_blockmap(hammer_blockmap_entry_t blockmap
, hammer_off_t zone_off
)
331 blockmap
->phys_offset
= alloc_bigblock();
332 blockmap
->alloc_offset
= zone_off
;
337 alloc_blockmap(int zone
, int bytes
, hammer_off_t
*result_offp
,
338 struct buffer_info
**bufferp
)
340 struct buffer_info
*buffer
;
341 struct volume_info
*volume
;
342 hammer_blockmap_entry_t rootmap
;
343 hammer_blockmap_entry_t blockmap
;
347 volume
= get_volume(RootVolNo
);
349 rootmap
= &volume
->ondisk
->vol0_blockmap
[zone
];
352 * Alignment and buffer-boundary issues
354 bytes
= (bytes
+ 7) & ~7;
355 if ((rootmap
->phys_offset
^ (rootmap
->phys_offset
+ bytes
- 1)) &
357 volume
->cache
.modified
= 1;
358 rootmap
->phys_offset
= (rootmap
->phys_offset
+ bytes
) &
365 i
= (rootmap
->alloc_offset
>> (HAMMER_LARGEBLOCK_BITS
+
366 HAMMER_BLOCKMAP_BITS
)) & HAMMER_BLOCKMAP_RADIX_MASK
;
368 blockmap
= get_buffer_data(rootmap
->phys_offset
+ i
* sizeof(*blockmap
),
371 if ((rootmap
->alloc_offset
& HAMMER_LARGEBLOCK_LAYER1_MASK
) == 0) {
372 buffer
->cache
.modified
= 1;
373 bzero(blockmap
, sizeof(*blockmap
));
374 blockmap
->phys_offset
= alloc_bigblock();
380 i
= (rootmap
->alloc_offset
>> HAMMER_LARGEBLOCK_BITS
) &
381 HAMMER_BLOCKMAP_RADIX_MASK
;
383 blockmap
= get_buffer_data(
384 blockmap
->phys_offset
+ i
* sizeof(*blockmap
), bufferp
, 0);
387 if ((rootmap
->alloc_offset
& HAMMER_LARGEBLOCK_MASK64
) == 0) {
388 buffer
->cache
.modified
= 1;
389 bzero(blockmap
, sizeof(*blockmap
));
390 blockmap
->phys_offset
= alloc_bigblock();
391 blockmap
->bytes_free
= HAMMER_LARGEBLOCK_SIZE
;
394 buffer
->cache
.modified
= 1;
395 volume
->cache
.modified
= 1;
396 blockmap
->bytes_free
-= bytes
;
397 *result_offp
= rootmap
->alloc_offset
;
398 rootmap
->alloc_offset
+= bytes
;
400 i
= (rootmap
->phys_offset
>> HAMMER_BUFFER_BITS
) &
401 HAMMER_BUFFERS_PER_LARGEBLOCK_MASK
;
402 ptr
= get_buffer_data(
403 blockmap
->phys_offset
+ i
* HAMMER_BUFSIZE
+
404 ((int32_t)*result_offp
& HAMMER_BUFMASK
), bufferp
, 0);
405 buffer
->cache
.modified
= 1;
415 struct volume_info
*volume
;
416 hammer_off_t result_offset
;
418 volume
= get_volume(RootVolNo
);
419 result_offset
= volume
->ondisk
->vol0_free_off
;
420 volume
->ondisk
->vol0_free_off
+= HAMMER_LARGEBLOCK_SIZE
;
421 if ((volume
->ondisk
->vol0_free_off
& HAMMER_OFF_SHORT_MASK
) >
422 (hammer_off_t
)(volume
->ondisk
->vol_buf_end
- volume
->ondisk
->vol_buf_beg
)) {
423 panic("alloc_bigblock: Ran out of room, filesystem too small");
426 return(result_offset
);
431 * Reserve space from the FIFO. Make sure that bytes does not cross a
434 * Zero out base_bytes and initialize the fifo head and tail. The
435 * data area is not zerod.
439 hammer_alloc_fifo(int32_t base_bytes
, int32_t ext_bytes
,
440 struct buffer_info
**bufp
, u_int16_t hdr_type
)
442 struct buffer_info
*buf
;
443 struct volume_info
*volume
;
444 hammer_fifo_head_t head
;
445 hammer_fifo_tail_t tail
;
447 int32_t aligned_bytes
;
449 aligned_bytes
= (base_bytes
+ ext_bytes
+ HAMMER_TAIL_ONDISK_SIZE
+
450 HAMMER_HEAD_ALIGN_MASK
) & ~HAMMER_HEAD_ALIGN_MASK
;
452 volume
= get_volume(RootVolNo
);
453 off
= volume
->ondisk
->vol0_fifo_end
;
456 * For now don't deal with transitions across buffer boundaries,
457 * only newfs_hammer uses this function.
459 assert((off
& ~HAMMER_BUFMASK64
) ==
460 ((off
+ aligned_bytes
) & ~HAMMER_BUFMASK
));
462 *bufp
= buf
= get_buffer(off
, 0);
464 buf
->cache
.modified
= 1;
465 volume
->cache
.modified
= 1;
467 head
= (void *)((char *)buf
->ondisk
+ ((int32_t)off
& HAMMER_BUFMASK
));
468 bzero(head
, base_bytes
);
470 head
->hdr_signature
= HAMMER_HEAD_SIGNATURE
;
471 head
->hdr_type
= hdr_type
;
472 head
->hdr_size
= aligned_bytes
;
473 head
->hdr_seq
= volume
->ondisk
->vol0_next_seq
++;
475 tail
= (void*)((char *)head
+ aligned_bytes
- HAMMER_TAIL_ONDISK_SIZE
);
476 tail
->tail_signature
= HAMMER_TAIL_SIGNATURE
;
477 tail
->tail_type
= hdr_type
;
478 tail
->tail_size
= aligned_bytes
;
480 volume
->ondisk
->vol0_fifo_end
+= aligned_bytes
;
481 volume
->cache
.modified
= 1;
491 * Flush various tracking structures to disk
495 * Flush various tracking structures to disk
498 flush_all_volumes(void)
500 struct volume_info
*vol
;
502 TAILQ_FOREACH(vol
, &VolList
, entry
)
507 flush_volume(struct volume_info
*volume
)
509 struct buffer_info
*buffer
;
511 TAILQ_FOREACH(buffer
, &volume
->buffer_list
, entry
)
512 flush_buffer(buffer
);
513 writehammerbuf(volume
, volume
->ondisk
, 0);
514 volume
->cache
.modified
= 0;
518 flush_buffer(struct buffer_info
*buffer
)
520 writehammerbuf(buffer
->volume
, buffer
->ondisk
, buffer
->buf_disk_offset
);
521 buffer
->cache
.modified
= 0;
526 * Generic buffer initialization
529 init_fifo_head(hammer_fifo_head_t head
, u_int16_t hdr_type
)
531 head
->hdr_signature
= HAMMER_HEAD_SIGNATURE
;
532 head
->hdr_type
= hdr_type
;
542 * Core I/O operations
545 readhammerbuf(struct volume_info
*vol
, void *data
, int64_t offset
)
549 n
= pread(vol
->fd
, data
, HAMMER_BUFSIZE
, offset
);
550 if (n
!= HAMMER_BUFSIZE
)
551 err(1, "Read volume %d (%s)", vol
->vol_no
, vol
->name
);
557 writehammerbuf(struct volume_info
*vol
, const void *data
, int64_t offset
)
561 n
= pwrite(vol
->fd
, data
, HAMMER_BUFSIZE
, offset
);
562 if (n
!= HAMMER_BUFSIZE
)
563 err(1, "Write volume %d (%s)", vol
->vol_no
, vol
->name
);
567 panic(const char *ctl
, ...)
572 vfprintf(stderr
, ctl
, va
);
574 fprintf(stderr
, "\n");