nrelease - fix/improve livecd
[dragonfly.git] / sys / vfs / hammer / hammer_btree.h
blobca0b7e0c758a336c5ff11e9d798c9f0477558a77
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $
37 #ifndef VFS_HAMMER_BTREE_H_
38 #define VFS_HAMMER_BTREE_H_
41 * HAMMER B-Tree index
43 * HAMMER implements a modified B+Tree. B+Trees store records only
44 * at their leaves and HAMMER's modification is to adjust the internal
45 * elements so there is a boundary element on each side instead of sub-tree
46 * pointers.
48 * We just call our modified B+Tree a 'B-Tree' in HAMMER documentation to
49 * reduce confusion.
51 * A B-Tree internal node looks like this:
53 * B N N N N N N B <-- boundary and internal elements
54 * S S S S S S S <-- subtree pointers
56 * A B-Tree leaf node looks like this:
58 * L L L L L L L L <-- leaf elemenets
59 * (there is also a previous and next-leaf pointer)
61 * The recursion radix of an internal node is reduced by 1 relative to
62 * a normal B-Tree in order to accomodate the right-hand boundary.
63 * The left-hand boundary (B in the left) is integrated into the first
64 * element so it doesn't require 2 elements to accomodate boundaries.
66 * The big benefit to using a B-Tree with built-in bounds information is
67 * that it makes it possible to cache pointers into the middle of the tree
68 * and not have to start searches, insertions, OR deletions at the root node.
69 * The boundary elements allow searches to progress in a definitive direction
70 * from any point in the tree without revisting nodes. It is also possible
71 * to terminate searches early and make minor adjustments to the boundaries
72 * (within the confines of the parent's boundaries) on the fly. This greatly
73 * improves the efficiency of many operations.
75 * All of the structures below are on-disk structures.
79 * Common base for all B-Tree element types (40 bytes)
81 * btype field represents a type of B-Tree ondisk structure that this
82 * B-Tree element points to, but not a type of B-Tree node that this
83 * B-Tree element is a part of. btype could be HAMMER_BTREE_TYPE_RECORD
84 * as well as HAMMER_BTREE_TYPE_INTERNAL and HAMMER_BTREE_TYPE_LEAF,
85 * while B-Tree node type is never HAMMER_BTREE_TYPE_RECORD.
87 * The following fields are keys used by hammer_btree_cmp() to compare
88 * B-Tree elements listed from higher to lower priority on comparison.
89 * B-Tree elements are first grouped by localization value, and then
90 * obj_id within a subtree of the same localization value, and so on.
92 * 1. localization
93 * 2. obj_id
94 * 3. rec_type
95 * 4. key
96 * 5. create_tid
98 typedef struct hammer_base_elm {
99 int64_t obj_id; /* 00 object record is associated with */
100 int64_t key; /* 08 indexing key (offset or namekey) */
102 hammer_tid_t create_tid; /* 10 transaction id for record creation */
103 hammer_tid_t delete_tid; /* 18 transaction id for record update/del */
105 uint16_t rec_type; /* 20 HAMMER_RECTYPE_ */
106 uint8_t obj_type; /* 22 HAMMER_OBJTYPE_ */
107 uint8_t btype; /* 23 B-Tree element type */
108 uint32_t localization; /* 24 B-Tree localization parameter */
109 /* 28 */
110 } *hammer_base_elm_t;
113 * Localization has sorting priority over the obj_id,rec_type,key,tid
114 * and is used to localize inodes for very fast directory scans.
116 * Localization can also be used to create pseudo-filesystems within
117 * a HAMMER filesystem. Pseudo-filesystems would be suitable
118 * replication targets.
120 * Upper 16 bits of the localization field in struct hammer_base_elm
121 * represents pseudo-filesystem id ranging from default 0 to 65535,
122 * and lower 16 bits represents its localization type in bitfield
123 * where 0x1 means the element is for inode and 0x2 means the element
124 * is for anything other than inode. Note that 0x3 (0x1|0x2) is not
125 * a valid type, while 0 and 0xFFFF are valid types for some cases.
127 * The root inode (not the PFS root inode but the real root) uses
128 * HAMMER_DEF_LOCALIZATION for its incore ip->obj_localization.
129 * HAMMER_DEF_LOCALIZATION implies PFS#0 and no localization type.
131 #define HAMMER_LOCALIZE_INODE 0x00000001
132 #define HAMMER_LOCALIZE_MISC 0x00000002 /* not inode */
133 #define HAMMER_LOCALIZE_MASK 0x0000FFFF
134 #define HAMMER_LOCALIZE_PSEUDOFS_MASK 0xFFFF0000
136 #define HAMMER_MIN_LOCALIZATION 0x00000000U
137 #define HAMMER_MAX_LOCALIZATION 0x0000FFFFU
138 #define HAMMER_DEF_LOCALIZATION 0x00000000U
140 #define HAMMER_MIN_ONDISK_LOCALIZATION \
141 HAMMER_MIN_LOCALIZATION
142 #define HAMMER_MAX_ONDISK_LOCALIZATION \
143 (HAMMER_MAX_LOCALIZATION | HAMMER_LOCALIZE_PSEUDOFS_MASK)
145 #define lo_to_pfs(lo) \
146 ((int)(((lo) & HAMMER_LOCALIZE_PSEUDOFS_MASK) >> 16))
147 #define pfs_to_lo(pfs) \
148 ((((uint32_t)(pfs)) << 16) & HAMMER_LOCALIZE_PSEUDOFS_MASK)
151 * Internal element (40 + 24 = 64 bytes).
153 * An internal element contains a recursion to another B-Tree node.
155 typedef struct hammer_btree_internal_elm {
156 struct hammer_base_elm base;
157 hammer_tid_t mirror_tid; /* mirroring support */
158 hammer_off_t subtree_offset;
159 int32_t reserved01;
160 int32_t reserved02;
161 } *hammer_btree_internal_elm_t;
164 * Leaf B-Tree element (40 + 24 = 64 bytes).
166 * NOTE: create_ts/delete_ts are not used by any core algorithms, they are
167 * used only by userland to present nominal real-time date strings
168 * to the user.
170 typedef struct hammer_btree_leaf_elm {
171 struct hammer_base_elm base;
172 uint32_t create_ts;
173 uint32_t delete_ts;
174 hammer_off_t data_offset;
175 int32_t data_len;
176 hammer_crc_t data_crc;
177 } *hammer_btree_leaf_elm_t;
180 * Rollup btree leaf element types - 64 byte structure
182 typedef union hammer_btree_elm {
183 struct hammer_base_elm base;
184 struct hammer_btree_leaf_elm leaf;
185 struct hammer_btree_internal_elm internal;
186 } *hammer_btree_elm_t;
189 * B-Tree node (64x64 = 4K structure)
191 * Each node contains 63 elements. The last element for an internal node
192 * is the right-boundary so internal nodes have one fewer logical elements
193 * then leaf nodes.
195 * 'count' always refers to the number of elements and is non-inclusive of
196 * the right-hand boundary for an internal node. For a leaf node, 'count'
197 * refers to the number of elements and there is no idea of boundaries.
199 * The use of a fairly large radix is designed to reduce the number of
200 * discrete disk accesses required to locate something. Keep in mind
201 * that nodes are allocated out of 16K hammer buffers so supported values
202 * are (256-1), (128-1), (64-1), (32-1), or (16-1). HAMMER uses 63-way
203 * so the node size is (64x(1+(64-1))) = 4KB.
205 * NOTE: FUTURE EXPANSION: The reserved fields in hammer_node_ondisk are
206 * reserved for left/right leaf linkage fields, flags, and other future
207 * features.
209 #define HAMMER_BTREE_TYPE_INTERNAL ((uint8_t)'I')
210 #define HAMMER_BTREE_TYPE_LEAF ((uint8_t)'L')
211 #define HAMMER_BTREE_TYPE_RECORD ((uint8_t)'R')
212 #define HAMMER_BTREE_TYPE_DELETED ((uint8_t)'D')
213 #define HAMMER_BTREE_TYPE_NONE ((uint8_t)0)
215 #define HAMMER_BTREE_LEAF_ELMS 63
216 #define HAMMER_BTREE_INT_ELMS (HAMMER_BTREE_LEAF_ELMS - 1)
218 typedef struct hammer_node_ondisk {
220 * B-Tree node header (64 bytes)
222 hammer_crc_t crc; /* MUST BE FIRST FIELD OF STRUCTURE */
223 uint32_t reserved01;
224 hammer_off_t parent; /* 0 if at root of B-Tree */
225 int32_t count; /* maximum 62 for INTERNAL, 63 for LEAF */
226 uint8_t type; /* B-Tree node type (INTERNAL or LEAF) */
227 uint8_t reserved02;
228 uint16_t reserved03;
229 hammer_off_t reserved04;
230 hammer_off_t reserved05;
231 hammer_off_t reserved06;
232 hammer_off_t reserved07;
233 hammer_tid_t mirror_tid; /* mirroring support (aggregator) */
236 * B-Tree node element array (64x63 bytes)
238 * Internal nodes have one less logical element
239 * (meaning: the same number of physical elements) in order to
240 * accomodate the right-hand boundary. The left-hand boundary
241 * is integrated into the first element. Leaf nodes have no
242 * boundary elements.
244 union hammer_btree_elm elms[HAMMER_BTREE_LEAF_ELMS];
245 } *hammer_node_ondisk_t;
247 #define HAMMER_BTREE_CRCSIZE \
248 (sizeof(struct hammer_node_ondisk) - sizeof(hammer_crc_t))
251 * Return 1 if elm is a node element of an internal node,
252 * otherwise return 0.
254 static __inline
256 hammer_is_internal_node_elm(hammer_btree_elm_t elm)
258 switch (elm->base.btype) {
259 case HAMMER_BTREE_TYPE_INTERNAL:
260 case HAMMER_BTREE_TYPE_LEAF:
261 return(1);
263 return(0);
267 * Return 1 if elm is a node element of a leaf node,
268 * otherwise return 0.
270 static __inline
272 hammer_is_leaf_node_elm(hammer_btree_elm_t elm)
274 switch (elm->base.btype) {
275 case HAMMER_BTREE_TYPE_RECORD:
276 return(1);
278 return(0);
281 static __inline
283 hammer_node_max_elements(uint8_t type)
285 switch (type) {
286 case HAMMER_BTREE_TYPE_LEAF:
287 return(HAMMER_BTREE_LEAF_ELMS);
288 case HAMMER_BTREE_TYPE_INTERNAL:
289 return(HAMMER_BTREE_INT_ELMS);
291 return(-1); /* invalid type */
294 static __inline
295 char
296 hammer_elm_btype(hammer_btree_elm_t elm)
298 switch(elm->base.btype) {
299 case HAMMER_BTREE_TYPE_INTERNAL:
300 case HAMMER_BTREE_TYPE_LEAF:
301 case HAMMER_BTREE_TYPE_RECORD:
302 case HAMMER_BTREE_TYPE_DELETED:
303 return(elm->base.btype); /* ascii */
304 case HAMMER_BTREE_TYPE_NONE:
305 return('*');
306 default:
307 return('?');
311 #endif /* !VFS_HAMMER_BTREE_H_ */