4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #ifndef _SYS_MD_MDDB_H
27 #define _SYS_MD_MDDB_H
29 #pragma ident "%Z%%M% %I% %E% SMI"
31 #include <sys/types.h>
38 #if 0 /* DRP FOR DEBUGGING */
43 #define MD_PRV_GOTIT 0x0001 /* Been snarfed */
44 #define MD_PRV_DELETE 0x0002 /* Record pending to be deleted */
45 #define MD_PRV_COMMIT 0x0004 /* Record pending to be commited */
46 #define MD_PRV_CLEANUP 0x0008 /* Record pending to be cleaned up */
47 #define MD_PRV_CONVD 0x0010 /* Record has been converted (32->64) */
48 #define MD_PRV_PENDDEL (MD_PRV_GOTIT | MD_PRV_DELETE)
49 #define MD_PRV_PENDCOM (MD_PRV_GOTIT | MD_PRV_COMMIT)
50 #define MD_PRV_PENDCLEAN (MD_PRV_GOTIT | MD_PRV_CLEANUP)
53 #define MDDB_E_INVALID (-1) /* an invalid argument was passed */
54 #define MDDB_E_EXISTS (-2) /* doing an operation a 2nd time which can */
55 /* only be done once */
56 #define MDDB_E_MASTER (-3) /* problem occurred accessing mastor block */
57 /* returned from NEW_DEV */
58 #define MDDB_E_TOOSMALL (-4) /* device is not large enough */
59 #define MDDB_E_NORECORD (-5) /* record does not exits */
61 * returned from: mddb_getnextrec
67 #define MDDB_E_NOSPACE (-6) /* no space to create record */
68 #define MDDB_E_NOTNOW (-7) /* do not presently have enough resources */
69 /* to perform requested operation */
70 #define MDDB_E_NODB (-8) /* no database exist */
71 #define MDDB_E_NOTOWNER (-9) /* have not been told to grab this set */
72 #define MDDB_E_STALE (-10) /* database is stale */
73 #define MDDB_E_TOOFEW (-11) /* not enough replicas available */
74 #define MDDB_E_TAGDATA (-12) /* tagged data detected */
75 #define MDDB_E_ACCOK (-13) /* 50/50 mode */
76 #define MDDB_E_NTAGDATA (-14) /* tagop try, no tag data */
77 #define MDDB_E_ACCNOTOK (-15) /* accop try, no accept possible */
78 #define MDDB_E_NOLOCBLK (-16) /* No valid locators found */
79 #define MDDB_E_NOLOCNMS (-17) /* No valid locator name information */
80 #define MDDB_E_NODIRBLK (-18) /* No directory blocks found */
81 #define MDDB_E_NOTAGREC (-19) /* No tag record blocks found */
82 #define MDDB_E_NOTAG (-20) /* No matching tag record found */
83 #define MDDB_E_NODEVID (-21) /* No device id found */
85 #define MDDB_MINBLKS 16 /* enough for a few metadevices */
86 #define MDDB_MAXBLKS 8192 /* size of free bit map (must be / 8) */
87 #define MDDB_MN_MINBLKS 32768 /* Multinode metadb minimum size */
89 #define MDDB_MN_MAXBLKS 524288 /* size of free bit map (must be / 8) */
92 #define MDDB_C_STALE 0x0001
93 #define MDDB_C_TOOFEW 0x0002
94 #define MDDB_C_NOTOWNER 0x0004
95 #define MDDB_C_SET_MN_STALE 0x0008 /* Set MN set to stale */
96 #define MDDB_C_IMPORT 0x0010
99 * Defines used to set/reset new master flag in set structure.
100 * Used during reconfig cycle to determine quickly if there is
101 * new master for the set.
103 #define MDDB_NM_SET 0x0001
104 #define MDDB_NM_RESET 0x0002
105 #define MDDB_NM_GET 0x0004
107 /* Definitions of flag in Locator Block Device ID data area - mddb_did_info */
108 #define MDDB_DID_EXISTS 0x0001 /* Device ID exists */
109 #define MDDB_DID_VALID 0x0002 /* Device ID valid on current system */
110 #define MDDB_DID_UPDATED 0x0004 /* locator/sidelocator info updated */
112 /* Definitions of flag in Locator Block - mddb_lb */
113 #define MDDB_DEVID_STYLE 0x0001 /* Locator Block in Device ID format */
114 #define MDDB_MNSET 0x0002 /* MDDB is for a multi-node set */
117 #define MDDB_MAX_PATCH 25 /* number of locations that */
118 /* can be patched in etc/system */
121 * Set struct used by all parts of the driver, to store anchor pointers.
123 * Lock associated with field in this structure:
125 * Some of fields are accessible by both the single threaded ioctl thread
126 * and internal threads such as resync, hotsparing...etc. In this case
127 * additional protection is needed. For example, s_db is protected by
128 * s_dbmx additionally and s_un, s_ui are protected by md_unit_array_rw.lock
129 * s_nm, s_nmid, s_did_nm and s_did_nmid and s_dtp are protected by nm_lock
130 * Rest of other fileds are protected by md_mx. Two fields s_un_next and
131 * s_un_avail are introduced by the friendly name project and are ONLY
132 * accessible via a single threaded ioctl thread which already is protected
133 * by the ioctl lock and there is no need to add extra protection to them.
134 * However, in the future if they become accessible by other internal threads
135 * then an additional protection such as md_mx lock is highly recommended.
138 typedef struct md_set
{
139 uint_t s_status
; /* set status */
140 void **s_ui
; /* set unit incore anchor */
141 void **s_un
; /* set unit anchor */
142 void *s_hsp
; /* set Hot Spare Pool anchor */
143 void *s_hs
; /* set Hot Spare anchor */
144 void *s_db
; /* set MDDB anchor */
145 kmutex_t s_dbmx
; /* set MDDB mutex */
146 void *s_nm
; /* set namespace anchor */
147 mddb_recid_t s_nmid
; /* set namespace anchor record */
148 void *s_did_nm
; /* set device id namespace anchor */
149 mddb_recid_t s_did_nmid
; /* set device id namespace anchor rec */
150 void *s_dtp
; /* set data tag rec */
151 int s_am_i_master
; /* incore master flag for this node */
152 md_mn_nodeid_t s_nodeid
; /* nodeid of this node - for MN sets */
153 uint_t s_rcnt
; /* incore resync count for set */
154 unit_t s_un_next
; /* s_un scan starts here */
155 unit_t s_un_avail
; /* number of avail slots */
159 #define MDDB_MAGIC_MB 0x6d646d62 /* magic number for master blocks */
160 #define MDDB_MAGIC_DB 0x6d646462 /* magic number for directory blocks */
161 #define MDDB_MAGIC_RB 0x6d647262 /* magic number for record blocks */
162 #define MDDB_MAGIC_LB 0x6d646c62 /* magic number for locator blocks */
163 #define MDDB_MAGIC_LN 0x6d646c6e /* magic number for locator names */
164 #define MDDB_MAGIC_DT 0x6d646474 /* magic number for data tag */
165 #define MDDB_MAGIC_DI 0x6d646469 /* magic number for device ID block */
166 #define MDDB_MAGIC_DU 0x6d646475 /* magic num for dummy mb */
167 #define MDDB_MAGIC_DE 0x6d646465 /* magic num for mb devid */
169 #define MDDB_GLOBAL_XOR 1234567890
171 #define MDDB_REV_MAJOR (uint_t)0xff00
172 #define MDDB_REV_MINOR (uint_t)0x00ff
176 * If a MN diskset, master block revision is set to MDDB_REV_MNMB.
177 * Even though the master block structure is no different
178 * for a MN set, setting the revision field to a different
179 * number keeps any pre-MN_diskset code from accessing
180 * this diskset. It also allows for an early determination
181 * of a MN diskset when reading in from disk so that the
182 * proper size locator block and locator names structure
183 * can be read in thus saving time on diskset startup.
184 * Since no change in master block structure, the MDDB_REV_MINOR
185 * portion of the revision was incremented.
188 * If a MN diskset, the locator block structure is a different size in
189 * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
190 * with any nodeid (sideno) allowed.
191 * The revision is set to MDDB_REV_MNLB which is a change of the
192 * MDDB_REV_MAJOR portion of the revision.
195 * If a MN diskset, the locator names is a different size in
196 * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
197 * with any nodeid (sideno) allowed.
198 * The revision is set to MDDB_REV_MNLN which is a change of the
199 * MDDB_REV_MAJOR portion of the revision.
201 * The record blocks have two binary properties. A record block can
202 * represent either a 32 or 64 bit unit. A record block can also represent
203 * a traditionally named unit or a friendly named unit. Thus, there are
204 * minor revisions of record block.
206 * Traditional Friendly
208 * ----------- --------
209 * 32 bit MDDB_REV_RB MDDB_REV_RBFN
210 * 64 bit MDDB_REV_RB64 MDDB_REV_RB64FN
213 #define MDDB_REV_MB (uint_t)0x0201
214 #define MDDB_REV_MNMB (uint_t)0x0202
215 #define MDDB_REV_DB (uint_t)0x0201
216 #define MDDB_REV_LB (uint_t)0x0500
217 #define MDDB_REV_MNLB (uint_t)0x0600
218 #define MDDB_REV_LN (uint_t)0x0100
219 #define MDDB_REV_MNLN (uint_t)0x0300
220 #define MDDB_REV_RB (uint_t)0x0200
221 #define MDDB_REV_RB64 (uint_t)0x0201
222 #define MDDB_REV_RBFN (uint_t)0x0202
223 #define MDDB_REV_RB64FN (uint_t)0x0203
224 #define MDDB_REV_DT (uint_t)0x0100
225 #define MDDB_REV_DI (uint_t)0x0100
228 * Transfer record block friendly name status to unit/hs structure.
230 #define MDDB_NOTE_FN(rbv, unv) switch (rbv) { \
232 case MDDB_REV_RB64: \
233 unv &= ~MD_FN_META_DEV; \
235 case MDDB_REV_RBFN: \
236 case MDDB_REV_RB64FN: \
237 unv |= MD_FN_META_DEV; \
241 #define MDDB_BSIZE (uint_t)DEV_BSIZE
242 #define MDDB_PREFIXCNT 10
243 #define MDDB_DRVNMCNT 10
245 typedef int mddb_block_t
;
247 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
250 typedef struct md_mnname_suffix
{
251 md_name_suffix mn_ln_suffix
;
253 } md_mnname_suffix_t
;
255 typedef struct mddb_ln
{
259 struct timeval32 ln_timestamp
;
260 md_name_prefix ln_prefixes
[MDDB_PREFIXCNT
];
261 /* Don't change array sizes without changing RNDUP_BLKCNT */
262 md_name_suffix ln_suffixes
[MD_MAXSIDES
][MDDB_NLB
];
266 * Locator name structure for MN diskset. Same as for traditional
267 * and local diskset except that more sides are supported and the
268 * side number can be any number since the side number is stored
269 * in the ln_mnsuffixes structure instead of being used as an index
270 * into that array. This means that the whole array may need to be
271 * searched in order to find the correct information given a side number.
273 typedef struct mddb_mnln
{
277 struct timeval32 ln_timestamp
;
278 md_name_prefix ln_prefixes
[MDDB_PREFIXCNT
];
279 /* Don't change array sizes without changing MDDB_MNLNCNT */
280 md_mnname_suffix_t ln_mnsuffixes
[MD_MNMAXSIDES
][MDDB_NLB
];
283 #define RNDUP_BLKCNT(sz, delta) (((sz) - \
285 ((MD_MAXSIDES - 1) * MDDB_NLB)) + \
286 MDDB_BSIZE - 1) / MDDB_BSIZE)
287 #define MDDB_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), 0)
288 #define MDDB_LOCAL_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), \
289 sizeof (md_name_suffix))
291 #define MDDB_MNLNCNT ((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \
294 typedef struct mddb_dt
{
301 #define MDDB_DT_BYTES (roundup(sizeof (mddb_dt_t), MDDB_BSIZE))
302 #define MDDB_DT_BLOCKS (btodb(MDDB_DT_BYTES))
304 typedef union identifier
{
305 char serial
[MDDB_SN_LEN
];
306 struct timeval32 createtime
;
309 typedef struct mddb_locator
{
315 typedef struct mddb_sidelocator
{
316 uchar_t l_drvnm_index
;
318 } mddb_sidelocator_t
;
320 typedef struct mddb_mnsidelocator
{
321 uchar_t mnl_drvnm_index
;
324 } mddb_mnsidelocator_t
;
326 typedef struct mddb_drvnm
{
328 char dn_data
[MD_MAXDRVNM
];
332 * Locator Block Device ID Information
333 * Several device id's may share one disk block in an effort to
334 * conserve used replica space.
336 typedef struct mddb_did_info
{
337 uint_t info_flags
; /* MDDB Device ID flags */
338 uint_t info_firstblk
; /* Device ID Start Block */
339 uint_t info_blkcnt
; /* Device ID Block Count */
340 uint_t info_offset
; /* Device ID offset w/i Block */
341 uint_t info_length
; /* Device ID Length */
342 uint_t info_checksum
; /* Device ID Checksum */
343 char info_minor_name
[32]; /* Minor name of lb dev */
346 typedef struct mddb_did_blk
{
347 int blk_magic
; /* used for verification */
348 uint_t blk_revision
; /* used for verification */
349 int blk_checksum
; /* used for verification */
350 uint_t blk_commitcnt
; /* matches LB's commitcnt */
351 mddb_did_info_t blk_info
[MDDB_NLB
];
353 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
357 #define MDDB_DID_BYTES (roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE))
358 #define MDDB_DID_BLOCKS (btodb(MDDB_DID_BYTES))
361 * Device ID Disk Blocks.
362 * Incore linked list of disk blocks containing device IDs.
363 * The list is built when reading in the mddb_did_blk structure and
364 * when reading in the actual disk blocks containing device ids.
365 * This list is used to easily write out all disk blocks containing
368 typedef struct mddb_did_db
{
369 uint_t db_firstblk
; /* Disk Block's logical addr */
370 uint_t db_blkcnt
; /* Contig Disk Block Count */
371 caddr_t db_ptr
; /* Ptr to incore Block(s) */
372 struct mddb_did_db
*db_next
; /* Ptr to next in list */
376 * Device ID Free List.
377 * Incore linked list of free space in disk blocks containing device IDs.
378 * Used to manage placement of device IDs in disk blocks.
379 * All disk blocks on free list are also in linked list of disk block
380 * containing device IDs (mddb_did_db_t).
382 typedef struct mddb_did_free
{
383 uint_t free_blk
; /* Disk Block's logical addr */
384 uint_t free_offset
; /* offset of free space */
385 uint_t free_length
; /* length of free space */
386 struct mddb_did_free
*free_next
; /* Ptr to next in list */
390 * Device ID Incore Area
391 * Contains pointer to Device ID Disk Block list and
392 * Device ID Free List.
393 * Also contains incore array of pointers to device IDs. Pointers
394 * point into the device ID Disk Block list and are used as a
395 * shortcut to find incore device IDs.
397 typedef struct mddb_did_ic
{
398 mddb_did_blk_t
*did_ic_blkp
;
399 mddb_did_db_t
*did_ic_dbp
;
400 mddb_did_free_t
*did_ic_freep
;
401 ddi_devid_t did_ic_devid
[MDDB_NLB
]; /* Ptr to device IDs */
405 * Locator Block (LB):
406 * - Are fixed size, but the size is different
407 * for local/shared set db replicas.
408 * - All LB's start at logical block 0.
409 * - After a replica quorum is found, there is
410 * is only one incore copy of the LB.
411 * - LB's are only written when replicas are added, deleted, or errored.
412 * - LB's provide information about other replica's and their state.
414 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
417 typedef struct mddb_lb
{
418 int lb_magic
; /* used for verification */
419 uint_t lb_revision
; /* used for verification */
420 int lb_checksum
; /* used for verification */
421 uint_t lb_commitcnt
; /* IMPORTANT */
422 struct timeval32 lb_timestamp
; /* informative only */
423 int lb_loccnt
; /* used for verification */
424 identifier_t lb_ident
; /* used for verification */
425 uint_t lb_flags
; /* flags describing LB */
426 uint_t lb_spare
[8]; /* Spare/Pad */
427 mddb_block_t lb_didfirstblk
; /* Devid Array Start Block */
428 mddb_block_t lb_didblkcnt
; /* Devid Array Number Blocks */
429 mddb_block_t lb_dtfirstblk
; /* Data Tag Start Block */
430 mddb_block_t lb_dtblkcnt
; /* Data Tag Number Block(s) */
431 struct timeval32 lb_inittime
; /* creation of database */
432 set_t lb_setno
; /* used for verification */
433 mddb_block_t lb_blkcnt
; /* used for verification */
434 mddb_block_t lb_lnfirstblk
;
435 mddb_block_t lb_lnblkcnt
;
436 mddb_block_t lb_dbfirstblk
;
437 mddb_drvnm_t lb_drvnm
[MDDB_DRVNMCNT
];
438 mddb_locator_t lb_locators
[MDDB_NLB
];
439 /* Don't change array sizes without changing RNDUP_BLKCNT */
440 mddb_sidelocator_t lb_sidelocators
[MD_MAXSIDES
][MDDB_NLB
];
442 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
447 * Locator block structure for MN diskset. Same as for traditional
448 * and local diskset except that more sides are supported and the
449 * side number can be any number since the side number is stored
450 * in the lb_mnsidelocators structure instead of being used as an index
451 * into that array. This means that the whole array may need to be
452 * searched in order to find the correct information given a side number.
454 typedef struct mddb_mnlb
{
455 int lb_magic
; /* used for verification */
456 uint_t lb_revision
; /* used for verification */
457 int lb_checksum
; /* used for verification */
458 uint_t lb_commitcnt
; /* IMPORTANT */
459 struct timeval32 lb_timestamp
; /* informative only */
460 int lb_loccnt
; /* used for verification */
461 identifier_t lb_ident
; /* used for verification */
462 uint_t lb_flags
; /* flags describing LB */
463 uint_t lb_spare
[8]; /* Spare/Pad */
464 mddb_block_t lb_didfirstblk
; /* Devid Array Start Block */
465 mddb_block_t lb_didblkcnt
; /* Devid Array Number Blocks */
466 mddb_block_t lb_dtfirstblk
; /* Data Tag Start Block */
467 mddb_block_t lb_dtblkcnt
; /* Data Tag Number Block(s) */
468 struct timeval32 lb_inittime
; /* creation of database */
469 set_t lb_setno
; /* used for verification */
470 mddb_block_t lb_blkcnt
; /* used for verification */
471 mddb_block_t lb_lnfirstblk
;
472 mddb_block_t lb_lnblkcnt
;
473 mddb_block_t lb_dbfirstblk
;
474 mddb_drvnm_t lb_drvnm
[MDDB_DRVNMCNT
];
475 mddb_locator_t lb_locators
[MDDB_NLB
];
476 /* Don't change array sizes without changing MDDB_MNLBCNT */
477 mddb_mnsidelocator_t lb_mnsidelocators
[MD_MNMAXSIDES
][MDDB_NLB
];
481 #define MDDB_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), 0)
482 #define MDDB_LOCAL_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), \
483 sizeof (mddb_sidelocator_t))
485 #define MDDB_MNLBCNT ((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \
488 typedef struct mddb_map
{
489 daddr32_t m_consecutive
;
490 daddr32_t m_firstblk
;
494 * Master block(s) (MB)
495 * - Are written by userland; Never by the driver!
496 * - Each replica has there own master blocks,
497 * the master block(s) are not shared.
498 * - MB's are not in the logical block address space of the database.
499 * - MB's are a fixed size record (MDDB_BSIZE)
500 * - MB's provide the logical to physical block translation,
503 typedef struct mddb_mb
{
504 int mb_magic
; /* used for verification */
505 uint_t mb_revision
; /* used for verification */
506 uint_t mb_checksum
; /* used for verification */
508 uint32_t mb_next
; /* incore to next mb */
510 struct mddb_mb
*mb_next
; /* incore to next mb */
512 daddr32_t mb_nextblk
; /* block # for next mb */
513 md_timeval32_t mb_timestamp
; /* timestamp */
514 daddr32_t mb_blkcnt
; /* size of blkmap */
515 daddr32_t mb_blkno
; /* physical loc. for this MB */
516 set_t mb_setno
; /* used for verification */
517 struct timeval32 mb_setcreatetime
; /* set creation timestamp */
519 mddb_map_t mb_blkmap
; /* logical->physical blk map */
520 int mb_devid_magic
; /* verify devid in mb */
521 short mb_devid_len
; /* len of following devid */
522 char mb_devid
[1]; /* devid byte array */
526 * In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on
527 * disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *)
529 #define MDDB_IC_BSIZE (MDDB_BSIZE + sizeof (struct mddb_mb_ic *))
530 typedef struct mddb_mb_ic
{
531 struct mddb_mb_ic
*mbi_next
;
532 struct mddb_mb mbi_mddb_mb
;
537 * there can be no address in record block. The checksum must
538 * stay the same where ever the record is in memory. Many
539 * things depend on this. Also the timestamp is the time the the
540 * record was committed not the time it was written to a particular
543 * Old definition of mddb_rb, for 32-bit apps and libraries
545 typedef struct mddb_rb
{
549 uint_t rb_checksum_fiddle
;
554 struct timeval32 rb_timestamp
;
558 /* This is, and always will be, the on-disk version of mddb_rb */
559 typedef struct mddb_rb32
{
563 uint_t rb_checksum_fiddle
;
565 uint32_t rb_userdata
;
568 struct timeval32 rb_timestamp
;
575 typedef struct mddb_optinfo
{
580 /* Old definition of mddb_de, for 32-bit apps and libraries */
581 typedef struct mddb_de
{
582 struct mddb_de
*de_next
;
584 mddb_recid_t de_recid
;
585 mddb_type_t de_type1
;
589 mddb_block_t de_blkcount
;
591 mddb_optinfo_t de_optinfo
[2];
592 mddb_block_t de_blks
[1];
596 * In core version of mddb_de, includes pointer for mddb_rb32_t user data
597 * mddb_rb32_t is used incore
599 typedef struct mddb_de_ic
{
600 void *de_rb_userdata
;
601 void *de_rb_userdata_ic
;
602 uint_t de_owner_nodeid
;
603 struct mddb_de_ic
*de_next
;
605 mddb_recid_t de_recid
;
606 mddb_type_t de_type1
;
613 mddb_optinfo_t de_optinfo
[2];
614 mddb_block_t de_blks
[1];
617 typedef struct mddb_db
{
621 mddb_block_t db_blknum
;
622 struct mddb_db
*db_next
;
623 mddb_block_t db_nextblk
;
624 struct timeval32 db_timestamp
;
627 mddb_de_ic_t
*db_firstentry
;
629 mddb_de_t
*db_firstentry
;
634 * This is, and always will be, the on-disk version of mddb_de
635 * When mddb_de32 is read in it is converted into mddb_de_ic
637 typedef struct mddb_de32
{
640 mddb_recid_t de32_recid
;
641 mddb_type_t de32_type1
;
645 mddb_block_t de32_blkcount
;
647 mddb_optinfo_t de32_optinfo
[2];
648 mddb_block_t de32_blks
[1];
652 * This is, and always will be, the on-disk version of mddb_db
653 * When mddb_db32 is read in it is converted into mddb_db
654 * To minimize impact on mddb format mddb_db fileds remain intact
656 typedef struct mddb_db32
{
658 uint_t db32_revision
;
659 uint_t db32_checksum
;
660 mddb_block_t db32_blknum
;
662 mddb_block_t db32_nextblk
;
663 struct timeval32 db32_timestamp
;
665 uint32_t db32_firstentry
;
668 #define de32tode(from, to) \
671 to->de_rb_userdata = NULL; \
672 to->de_owner_nodeid = MD_MN_INVALID_NID; \
673 to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \
674 to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \
675 to->de_recid = from->de32_recid; \
676 to->de_type1 = from->de32_type1; \
677 to->de_type2 = from->de32_type2; \
678 to->de_reqsize = from->de32_reqsize; \
679 to->de_recsize = from->de32_recsize; \
680 to->de_blkcount = from->de32_blkcount; \
681 to->de_flags = from->de32_flags; \
682 to->de_optinfo[0] = from->de32_optinfo[0]; \
683 to->de_optinfo[1] = from->de32_optinfo[1]; \
684 for (i = 0; i < from->de32_blkcount; i++) \
685 to->de_blks[i] = from->de32_blks[i]; \
688 #define detode32(from, to) \
691 to->de32_next = (uint32_t)(uintptr_t)from->de_next; \
692 to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \
693 to->de32_recid = from->de_recid; \
694 to->de32_type1 = from->de_type1; \
695 to->de32_type2 = from->de_type2; \
696 to->de32_reqsize = from->de_reqsize; \
697 to->de32_recsize = from->de_recsize; \
698 to->de32_blkcount = from->de_blkcount; \
699 to->de32_flags = from->de_flags; \
700 to->de32_optinfo[0] = from->de_optinfo[0]; \
701 to->de32_optinfo[1] = from->de_optinfo[1]; \
702 for (i = 0; i < from->de_blkcount; i++) \
703 to->de32_blks[i] = from->de_blks[i]; \
706 #define db32todb(from, to) \
707 to->db_magic = from->db32_magic; \
708 to->db_revision = from->db32_revision; \
709 to->db_checksum = from->db32_checksum; \
710 to->db_blknum = from->db32_blknum; \
711 to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \
712 to->db_nextblk = from->db32_nextblk; \
713 to->db_timestamp = from->db32_timestamp; \
714 to->db_recsum = from->db32_recsum; \
715 to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry;
717 #define dbtodb32(from, to) \
718 to->db32_magic = from->db_magic; \
719 to->db32_revision = from->db_revision; \
720 to->db32_checksum = from->db_checksum; \
721 to->db32_blknum = from->db_blknum; \
722 to->db32_next = (uint32_t)(uintptr_t)from->db_next; \
723 to->db32_nextblk = from->db_nextblk; \
724 to->db32_timestamp = from->db_timestamp; \
725 to->db32_recsum = from->db_recsum; \
726 to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry;
729 * information about a replica of the data base
731 typedef struct mddb_ri
{
732 struct mddb_ri
*ri_next
;
739 mddb_mb_ic_t
*ri_mbip
;
742 mddb_did_ic_t
*ri_did_icp
;
743 ddi_devid_t ri_devid
;
744 ddi_devid_t ri_old_devid
;
745 char ri_minor_name
[MDDB_MINOR_NAME_MAX
];
746 char ri_devname
[MAXPATHLEN
];
749 typedef struct mddb_bf
{
750 struct mddb_bf
*bf_next
;
751 mddb_locator_t
*bf_locator
;
756 * Information for sets of databases (which include replicas)
758 #define MDDB_BITSRECID 31
759 #define MDDB_SETSHIFT (MDDB_BITSRECID - MD_BITSSET)
760 #define MDDB_SETMASK (MD_SETMASK << MDDB_SETSHIFT)
761 #define MDDB_RECIDMASK ((1 << MDDB_SETSHIFT) - 1)
763 #define DBSET(id) (((id) & MDDB_SETMASK) >> MDDB_SETSHIFT)
764 #define DBID(id) ((id) & MDDB_RECIDMASK)
765 #define MAKERECID(s, i) ((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \
766 ((i) & MDDB_RECIDMASK))
768 #define MDDB_PARSE_LOCBLK 0x00000001
769 #define MDDB_PARSE_LOCNM 0x00000002
770 #define MDDB_PARSE_OPTRECS 0x00000004
771 #define MDDB_PARSE_MASK 0x0000000F
774 #define MDDB_BLOCK_PARSE 0x00000001 /* Block sending parse msgs */
775 #define MDDB_UNBLOCK_PARSE 0x00000002 /* Unblock sending parse msgs */
778 * We need to keep s_ident and s_inittime 32 bit. They are used in mddb_lb
780 typedef struct mddb_set
{
781 uint_t s_setno
; /* set number */
782 uint_t s_sideno
; /* side number */
783 identifier_t s_ident
; /* set identifier */
784 char *s_setname
; /* set name */
785 mddb_mb_ic_t
**s_mbiarray
; /* master blocks array */
786 mddb_db_t
*s_dbp
; /* directory block */
787 mddb_lb_t
*s_lbp
; /* locator block */
788 /* May be cast to mddb_mnlb_t */
789 /* if accessing sidenames in */
791 mddb_ln_t
*s_lnp
; /* locator names block */
792 /* May be cast to mddb_mnln_t */
793 /* if accessing sidenames in */
795 mddb_dtag_lst_t
*s_dtlp
; /* List of data tags found */
796 mddb_did_ic_t
*s_did_icp
; /* Device ID incore area */
797 mddb_ri_t
*s_rip
; /* replicas incore list */
798 int s_freeblkcnt
; /* visable for test code */
799 int s_totalblkcnt
; /* visable for test code */
800 int s_mn_parseflags
; /* mddb parse flags for MNset */
801 int s_mn_parseflags_sending
; /* parse flgs sent to slaves */
802 uchar_t
*s_freebitmap
; /* free blocks bitmap */
803 uint_t s_freebitmapsize
; /* size of bitmap */
804 struct timeval32 s_inittime
; /* timestamp set created */
805 mddb_recid_t s_zombie
; /* zombie record - createrec */
806 int s_staledeletes
; /* number of stale deleterec */
807 int s_optcmtcnt
; /* Following are opt. record */
808 int s_opthavelck
; /* bookkeeping records ... */
810 kcondvar_t s_optwantlck_cv
;
813 kcondvar_t s_opthungerr_cv
;
814 int s_opthavequeuinglck
;
815 int s_optwantqueuinglck
;
816 kcondvar_t s_optqueuing_cv
;
818 mddb_bf_t
*s_freebufhead
;
821 size_t s_databuffer_size
;
823 int s_singlelockgotten
;
824 int s_singlelockwanted
;
825 kcondvar_t s_single_thread_cv
;
832 extern uint_t
mddb_lb_did_convert(mddb_set_t
*,
834 extern void mddb_locatorblock2splitname(mddb_ln_t
*,
835 int, side_t
, md_splitname
*);
836 extern int mddb_configure(mddb_cfgcmd_t
,
837 struct mddb_config
*);
838 extern mddb_recid_t
mddb_getnextrec(mddb_recid_t
,
839 mddb_type_t
, uint_t
);
840 extern int mddb_getoptloc(mddb_optloc_t
*);
841 extern void *mddb_getrecaddr(mddb_recid_t
);
842 extern void *mddb_getrecaddr_resize(mddb_recid_t
, size_t,
844 extern int mddb_getrecprivate(mddb_recid_t
);
845 extern void mddb_setrecprivate(mddb_recid_t
, uint_t
);
846 extern mddb_de_ic_t
*mddb_getrecdep(mddb_recid_t
);
847 extern mddb_type_t
mddb_getrectype1(mddb_recid_t
);
848 extern int mddb_getrectype2(mddb_recid_t
);
849 extern int mddb_getrecsize(mddb_recid_t
);
850 extern int mddb_commitrec(mddb_recid_t
);
851 extern int mddb_commitrecs(mddb_recid_t
*);
852 extern int mddb_deleterec(mddb_recid_t
);
853 extern mddb_recstatus_t
mddb_getrecstatus(mddb_recid_t
);
854 extern mddb_recid_t
mddb_createrec(size_t usersize
,
855 mddb_type_t type
, uint_t type2
,
856 md_create_rec_option_t option
, set_t setno
);
857 extern void mddb_init(void);
858 extern void mddb_unload(void);
859 extern void mddb_unload_set(set_t setno
);
860 extern mddb_recid_t
mddb_makerecid(set_t setno
, mddb_recid_t id
);
861 extern set_t
mddb_getsetnum(mddb_recid_t id
);
862 extern char *mddb_getsetname(set_t setno
);
863 extern side_t
mddb_getsidenum(set_t setno
);
864 extern int mddb_ownset(set_t setno
);
865 extern int getmed_ioctl(mddb_med_parm_t
*medpp
, int mode
);
866 extern int setmed_ioctl(mddb_med_parm_t
*medpp
, int mode
);
867 extern int updmed_ioctl(mddb_med_upd_parm_t
*medpp
,
869 extern int take_set(mddb_config_t
*cp
, int mode
);
870 extern int release_set(mddb_config_t
*cp
, int mode
);
871 extern int gettag_ioctl(mddb_dtag_get_parm_t
*dtgpp
,
873 extern int usetag_ioctl(mddb_dtag_use_parm_t
*dtupp
,
875 extern int accept_ioctl(mddb_accept_parm_t
*medpp
,
877 extern int md_update_locator_namespace(set_t setno
,
878 side_t side
, char *dname
, char *pname
,
880 extern int mddb_validate_lb(set_t setno
, int *rmaxsz
);
881 extern int mddb_getinvlb_devid(set_t setno
, int count
,
882 int size
, char **ctdptr
);
883 extern int md_update_minor(set_t
, side_t
, mdkey_t
);
884 extern int md_update_nm_rr_did_ioctl(mddb_config_t
*cp
);
885 extern int md_update_top_device_minor(set_t
, side_t
,
888 extern void mddb_check(void);
896 #define md_lb_did_convert(a, b, c) (0)
897 #define mddb_configure(a, b) (0)
898 #define mddb_getnextrec(a, b, c) ((mddb_recid_t)0)
899 #define mddb_getrecaddr(a) (mddb_fakeit)
900 #define mddb_getrecprivate(a) (0)
901 #define mddb_setrecprivate(a, b) (0)
902 #define mddb_getrectype1(a) (0)
903 #define mddb_getrectype2(a) (0)
904 #define mddb_getrecsize(a) (0)
905 #define mddb_commitrec(a) (0)
906 #define mddb_commitrecs(a) (0)
907 #define mddb_deleterec(a) (0)
908 #define mddb_getrecstatus(a) (MDDB_OK)
909 #define mddb_createrec(s, a, b) (0xffff & (int)(mddb_fakeit = \
910 (caddr_t)kmem_zalloc(s, KM_SLEEP)))
911 #define mddb_unload() (0)
915 #define MDDB_NOSLEEP 1
916 #define MDDB_SLEEPOK 0
918 #define MDDB_NOOLDOK 0x1
919 #define MDDB_MUSTEXIST 0x2
920 #define MDDB_NOINIT 0x4
921 #define MDDB_MULTINODE 0x8
922 #define MDDB_MN_STALE 0x10 /* MN set is stale */
924 /* Flags passed to selectreplicas - not a bit mask */
925 #define MDDB_SCANALL 1
926 #define MDDB_RETRYSCAN 0
927 #define MDDB_SCANALLSYNC 2 /* During reconfig, sync up incore */
928 /* and ondisk mddb by writing incore */
929 /* values to disk. Don't write */
930 /* change log records. */
932 /* Flags passed to writestart and writecopy */
933 #define MDDB_WRITECOPY_ALL 1 /* Write all incore mddb to disk */
934 #define MDDB_WRITECOPY_SYNC 2 /* Write incore mddb to disk except */
935 /* - change log records */
936 /* - optimized resync records */
940 #define MDDB_NOPROBE 0
944 * MN diskset definitions used to determine if a slave can write
945 * directly to the mddb. ONLY_MASTER only allows the master node
946 * to write to the mddb. ANY_NODE allows any node to write
949 #define MDDB_WR_ONLY_MASTER 0
950 #define MDDB_WR_ANY_NODE 1
952 #define MDDB_L_LOCKED 0x0001 /* this record is locked */
953 #define MDDB_L_WANTED 0x0002
959 #endif /* _SYS_MD_MDDB_H */