4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/types.h>
32 #include <sys/debug.h>
33 #include <sys/errno.h>
34 #include <sys/sysmacros.h>
35 #include <sys/t_lock.h>
37 #include <sys/lvm/md_trans.h>
39 #include <sys/sunddi.h>
43 #include <sys/lvm/md_notify.h>
44 #include <sys/lvm/mdvar.h>
46 #include <sys/sysevent/eventdefs.h>
47 #include <sys/sysevent/svm.h>
49 extern unit_t md_nunits
;
50 extern set_t md_nsets
;
51 extern md_set_t md_set
[];
53 extern md_ops_t trans_md_ops
;
54 extern major_t md_major
;
59 static kmutex_t ml_lock
;
60 static ml_unit_t
*ul_list
; /* List of all log units */
62 static kmutex_t ut_mutex
; /* per log list of metatrans units */
63 static kmutex_t oc_mutex
; /* single threads opens/closes */
65 static void md_free_cirbuf(cirbuf_ic_t
*cb
);
67 #define IOWAIT(bp) sema_p(&bp->b_io)
68 #define IODONE(bp) sema_v(&bp->b_io)
73 mutex_init(&ut_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
74 mutex_init(&oc_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
75 mutex_init(&ml_lock
, NULL
, MUTEX_DRIVER
, NULL
);
81 mutex_destroy(&ut_mutex
);
82 mutex_destroy(&oc_mutex
);
83 mutex_destroy(&ml_lock
);
87 ldl_errorstate(ml_unit_t
*ul
)
93 else if (ldl_isherror(ul
))
98 cmn_err(CE_WARN
, "md: logging device: %s changed state to %s",
99 md_devname(mddb_getsetnum(ul
->un_recid
), ul
->un_dev
, NULL
, 0), str
);
104 * atomically commit the log unit struct and any underlying metadevice struct
107 logcommitdb(ml_unit_t
*ul
)
109 mddb_recid_t recids
[4];
111 TRANSSTATS(ts_logcommitdb
);
113 uniqtime32(&ul
->un_timestamp
);
116 * commit the log device and its child (if metadevice)
118 recids
[0] = ul
->un_recid
;
119 if (ul
->un_status
& LDL_METADEVICE
) {
120 struct mdc_unit
*c
= MD_UNIT(md_getminor(ul
->un_dev
));
121 recids
[1] = c
->un_record_id
;
126 mddb_commitrecs_wrapper(recids
);
130 md_alloc_wrbuf(cirbuf_ic_t
*cb
, size_t bufsize
)
136 * Clear previous allocation
141 bzero((caddr_t
)cb
, sizeof (*cb
));
142 rw_init(&cb
->cb_rwlock
.lock
, NULL
, RW_DRIVER
, NULL
);
144 rw_enter(&cb
->cb_rwlock
.lock
, RW_WRITER
);
147 * preallocate 3 bp's and put them on the free list.
149 for (i
= 0; i
< 3; ++i
) {
150 bp
= md_trans_zalloc(sizeof (buf_t
));
151 sema_init(&bp
->b_sem
, 1, NULL
, SEMA_DEFAULT
, NULL
);
152 sema_init(&bp
->b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
154 bp
->b_forw
= cb
->cb_free
;
157 TRANSSTATS(ts_alloc_bp
);
160 cb
->cb_va
= md_trans_alloc(bufsize
);
164 * first bp claims entire write buffer
167 cb
->cb_free
= bp
->b_forw
;
172 bp
->b_un
.b_addr
= cb
->cb_va
;
173 bp
->b_bufsize
= cb
->cb_nb
;
175 rw_exit(&cb
->cb_rwlock
.lock
);
179 md_alloc_rdbuf(cirbuf_ic_t
*cb
, size_t bufsize
, size_t blksize
)
186 * Clear previous allocation
191 bzero((caddr_t
)cb
, sizeof (*cb
));
192 rw_init(&cb
->cb_rwlock
.lock
, NULL
, RW_DRIVER
, NULL
);
194 rw_enter(&cb
->cb_rwlock
.lock
, RW_WRITER
);
196 cb
->cb_va
= md_trans_alloc(bufsize
);
200 * preallocate N bufs that are hard-sized to blksize
201 * in other words, the read buffer pool is a linked list
202 * of statically sized bufs.
205 while ((nb
= bufsize
) != 0) {
208 bp
= md_trans_alloc(sizeof (buf_t
));
209 bzero((caddr_t
)bp
, sizeof (buf_t
));
210 sema_init(&bp
->b_sem
, 1, NULL
, SEMA_DEFAULT
, NULL
);
211 sema_init(&bp
->b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
212 bp
->b_un
.b_addr
= va
;
216 bp
->b_forw
= cb
->cb_bp
->b_forw
;
217 bp
->b_back
= cb
->cb_bp
;
218 cb
->cb_bp
->b_forw
->b_back
= bp
;
219 cb
->cb_bp
->b_forw
= bp
;
221 bp
->b_forw
= bp
->b_back
= bp
;
224 TRANSSTATS(ts_alloc_bp
);
230 rw_exit(&cb
->cb_rwlock
.lock
);
235 md_free_cirbuf(cirbuf_ic_t
*cb
)
242 rw_enter(&cb
->cb_rwlock
.lock
, RW_WRITER
);
243 ASSERT(cb
->cb_dirty
== NULL
);
246 * free the active bufs
248 while ((bp
= cb
->cb_bp
) != NULL
) {
249 if (bp
== bp
->b_forw
)
252 cb
->cb_bp
= bp
->b_forw
;
253 bp
->b_back
->b_forw
= bp
->b_forw
;
254 bp
->b_forw
->b_back
= bp
->b_back
;
255 sema_destroy(&bp
->b_sem
);
256 sema_destroy(&bp
->b_io
);
257 md_trans_free(bp
, sizeof (buf_t
));
263 while ((bp
= cb
->cb_free
) != NULL
) {
264 cb
->cb_free
= bp
->b_forw
;
265 sema_destroy(&bp
->b_sem
);
266 sema_destroy(&bp
->b_io
);
267 md_trans_free(bp
, sizeof (buf_t
));
269 md_trans_free(cb
->cb_va
, cb
->cb_nb
);
272 rw_exit(&cb
->cb_rwlock
.lock
);
273 rw_destroy(&cb
->cb_rwlock
.lock
);
277 ldl_build_incore(ml_unit_t
*ul
, int snarfing
)
282 setno
= mddb_getsetnum(ul
->un_recid
);
284 ASSERT(ul
->un_head_lof
>= ul
->un_bol_lof
);
285 ASSERT(ul
->un_bol_lof
);
287 if (ul
->un_status
& LDL_BEING_RESET
) {
288 mddb_setrecprivate(ul
->un_recid
, MD_PRV_PENDCLEAN
);
293 * If snarfing the log device,
294 * then remake the device number
295 * else (we are creating the log device)
296 * set the driver name in the shared name space.
299 ul
->un_dev
= md_getdevnum(setno
, mddb_getsidenum(setno
),
300 ul
->un_key
, MD_NOTRUST_DEVT
);
304 * With the current device id implementation there is possibility
305 * that we may have NODEV if the underlying can't be resolved at
306 * snarf time. If this is the case we want to be consistent with
307 * the normal behavior and continue to allow log to be put on the list.
308 * We delay the resolve of the dev_t so we can resolve at the open
309 * time of the log device by device id
311 if ((md_getmajor(ul
->un_dev
) == md_major
) &&
312 (md_dev_exists(ul
->un_dev
) == 0)) {
316 mutex_enter(&ml_lock
);
319 * initialize incore structs
320 * LDL_FIND_TAIL flag indicates that all I/O must wait until the
321 * tail has been found.
326 ul
->un_utlist
= NULL
;
327 ul
->un_logmap
= NULL
;
328 ul
->un_status
|= LDL_FIND_TAIL
;
329 ul
->un_status
&= ~LDL_SCAN_ACTIVE
;
330 ASSERT(ul
->un_devbsize
== DEV_BSIZE
);
332 mutex_init(&ul
->un_log_mutex
, NULL
, MUTEX_DRIVER
, NULL
);
335 * allocate some read and write buffers
337 bufsize
= md_ldl_bufsize(ul
);
338 ul
->un_rdbuf
.cb_nb
= 0;
339 md_alloc_rdbuf(&ul
->un_rdbuf
, bufsize
, MAPBLOCKSIZE
);
340 ul
->un_wrbuf
.cb_nb
= 0;
341 md_alloc_wrbuf(&ul
->un_wrbuf
, bufsize
);
344 if (ul
->un_error
& LDL_ANYERROR
) {
345 ul
->un_error
= LDL_HERROR
;
351 /* Put on the unit list */
352 ul
->un_next
= ul_list
;
356 mutex_exit(&ml_lock
);
361 ldl_findlog(mddb_recid_t recid
)
366 * Find a unit struct by database recid
368 mutex_enter(&ml_lock
);
369 for (ul
= ul_list
; ul
; ul
= ul
->un_next
)
370 if (ul
->un_recid
== recid
)
372 mutex_exit(&ml_lock
);
377 * ldl_utadd adds a metatrans device to the log's list of mt devices.
378 * WARNING: top_end_sync() scans this list W/O locking for performance!!!
381 ldl_utadd(mt_unit_t
*un
)
383 ml_unit_t
*ul
= un
->un_l_unit
;
388 mutex_enter(&ut_mutex
);
389 un
->un_next
= ul
->un_utlist
;
391 ASSERT((ul
->un_logmap
== NULL
) || (ul
->un_logmap
== un
->un_logmap
));
392 ul
->un_logmap
= un
->un_logmap
;
393 mutex_exit(&ut_mutex
);
397 * ldl_utdel removes a metatrans device to the log's list of mt devices.
398 * WARNING: top_end_sync() scans this list W/O locking for performance!!!
401 ldl_utdel(mt_unit_t
*un
)
403 ml_unit_t
*ul
= un
->un_l_unit
;
404 mt_unit_t
**utp
= &ul
->un_utlist
;
406 mutex_enter(&ut_mutex
);
407 for (utp
= &ul
->un_utlist
;
408 *utp
&& (*utp
!= un
);
409 utp
= &(*utp
)->un_next
);
412 un
->un_l_unit
= NULL
;
413 mutex_exit(&ut_mutex
);
417 ldl_create(mdkey_t key
, mt_unit_t
*un
)
425 setno
= MD_UN2SET(un
);
428 * Find a unit struct for this key and set
429 * If we found one then, we are done.
432 mutex_enter(&ml_lock
);
433 for (ul
= ul_list
; ul
; ul
= ul
->un_next
)
434 if ((ul
->un_key
== key
) &&
435 (mddb_getsetnum(ul
->un_recid
) == setno
))
437 mutex_exit(&ml_lock
);
440 return (ul
->un_recid
);
442 typ1
= (mddb_type_t
)md_getshared_key(setno
,
443 trans_md_ops
.md_driver
.md_drivername
);
444 recid
= mddb_createrec(ML_UNIT_ONDSZ
, typ1
, LOG_REC
,
445 MD_CRO_32BIT
| MD_CRO_TRANS_LOG
, setno
);
448 mddb_setrecprivate(recid
, MD_PRV_GOTIT
);
450 ul
= (ml_unit_t
*)mddb_getrecaddr_resize(recid
, sizeof (*ul
), 0);
452 ul
->un_recid
= recid
;
454 ul
->un_dev
= md_getdevnum(setno
, mddb_getsidenum(setno
), key
,
456 ul
->un_bol_lof
= (off32_t
)dbtob(un
->un_l_sblk
);
457 ul
->un_eol_lof
= ul
->un_bol_lof
+ (off32_t
)dbtob(un
->un_l_nblks
);
458 ul
->un_pwsblk
= un
->un_l_pwsblk
;
459 ul
->un_nblks
= un
->un_l_nblks
;
460 ul
->un_tblks
= un
->un_l_tblks
;
461 ul
->un_maxresv
= un
->un_l_maxresv
;
462 ul
->un_maxtransfer
= (uint_t
)dbtob(un
->un_l_maxtransfer
);
463 ul
->un_devbsize
= DEV_BSIZE
;
469 ul
->un_head_lof
= ul
->un_bol_lof
;
470 ul
->un_tail_lof
= ul
->un_bol_lof
;
471 ul
->un_head_ident
= tv
.tv_sec
;
472 ul
->un_tail_ident
= tv
.tv_sec
;
474 if (md_getmajor(ul
->un_dev
) == md_major
)
475 ul
->un_status
|= LDL_METADEVICE
;
477 md_set_parent(ul
->un_dev
, (int)MD_MULTI_PARENT
);
478 (void) ldl_build_incore(ul
, 0);
484 ldl_open_dev(mt_unit_t
*un
, ml_unit_t
*ul
)
488 minor_t mnum
= MD_SID(un
);
489 set_t setno
= MD_MIN2SET(MD_SID(un
));
490 side_t side
= mddb_getsidenum(setno
);
492 mutex_enter(&oc_mutex
);
494 if (ul
->un_opencnt
) {
496 mutex_exit(&oc_mutex
);
502 * Do the open by device id if it is regular device
504 if ((md_getmajor(tmpdev
) != md_major
) &&
505 md_devid_found(setno
, side
, ul
->un_key
) == 1) {
506 tmpdev
= md_resolve_bydevid(mnum
, tmpdev
, ul
->un_key
);
508 err
= md_layered_open(mnum
, &tmpdev
, MD_OFLG_NULL
);
514 mutex_exit(&oc_mutex
);
519 ldl_close_dev(ml_unit_t
*ul
)
522 mutex_enter(&oc_mutex
);
526 if (ul
->un_opencnt
) {
527 mutex_exit(&oc_mutex
);
531 /* Last reference to the log, close it */
532 md_layered_close(ul
->un_dev
, MD_OFLG_NULL
);
534 mutex_exit(&oc_mutex
);
542 ldl_isherror(ml_unit_t
*ul
)
544 return ((ul
!= NULL
) && (ul
->un_error
& LDL_HERROR
));
548 ldl_iserror(ml_unit_t
*ul
)
550 return ((ul
!= NULL
) && (ul
->un_error
& LDL_ERROR
));
554 md_ldl_bufsize(ml_unit_t
*ul
)
559 * initial guess is the maxtransfer value for this log device
560 * reduce by number of logs
561 * increase for sharing
562 * increase if too small
563 * decrease if too large
565 bufsize
= ul
->un_maxtransfer
;
569 bufsize
*= ul
->un_transcnt
;
570 bufsize
= dbtob(btod(bufsize
));
571 if (bufsize
< LDL_MINBUFSIZE
)
572 bufsize
= LDL_MINBUFSIZE
;
573 if (bufsize
> maxphys
)
575 if (bufsize
> ul
->un_maxtransfer
)
576 bufsize
= ul
->un_maxtransfer
;
581 * if necessary; open all underlying devices for ul and start threads
582 * called at snarf, metainit, and open
585 ldl_open_underlying(mt_unit_t
*un
)
587 ml_unit_t
*ul
= un
->un_l_unit
;
592 * first, handle the case of detached logs
595 err
= trans_open_all_devs(un
);
597 un
->un_flags
&= ~TRANS_NEED_OPEN
;
598 un
->un_flags
|= TRANS_OPENED
;
604 * remove log unit struct from global linked list
607 ldl_unlist(ml_unit_t
*ul
)
614 mutex_enter(&ml_lock
);
615 for (ulp
= &ul_list
; *ulp
&& (*ulp
!= ul
); ulp
= &(*ulp
)->un_next
);
620 mutex_exit(&ml_lock
);
624 * get rid of a log unit from the database
627 ldl_cleanup(ml_unit_t
*ul
)
631 /* Save the log key */
632 sv
.setno
= mddb_getsetnum(ul
->un_recid
);
635 mddb_deleterec_wrapper(ul
->un_recid
);
636 md_rem_names(&sv
, 1);
640 ldl_delete(ml_unit_t
*ul
, int removing
)
651 md_free_cirbuf(&ul
->un_rdbuf
);
652 md_free_cirbuf(&ul
->un_wrbuf
);
654 mutex_destroy(&ul
->un_log_mutex
);
657 md_reset_parent(ul
->un_dev
);
658 ul
->un_status
|= LDL_BEING_RESET
;
665 * detach log from trans device
666 * caller insures that trans device is idle and will remain idle
670 ldl_reset(mt_unit_t
*un
, int removing
, int force
)
672 ml_unit_t
*ul
= un
->un_l_unit
;
677 if (un
->un_flags
& TRANS_DETACHING
) {
678 un
->un_flags
&= ~TRANS_DETACHING
;
679 un
->un_flags
|= TRANS_DETACHED
;
684 * remove this metatrans device from the log's list of mt devices
694 ldl_delete(ul
, removing
);