7712 mandoc -Tlint does always exit with error code 0
[unleashed.git] / usr / src / uts / common / io / lvm / trans / trans_log.c
blob96d8a7cb434ddb35364e9de9755bce3597fc9815
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/types.h>
32 #include <sys/debug.h>
33 #include <sys/errno.h>
34 #include <sys/sysmacros.h>
35 #include <sys/t_lock.h>
36 #include <sys/kmem.h>
37 #include <sys/lvm/md_trans.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/file.h>
41 #include <sys/proc.h>
42 #include <sys/disp.h>
43 #include <sys/lvm/md_notify.h>
44 #include <sys/lvm/mdvar.h>
46 #include <sys/sysevent/eventdefs.h>
47 #include <sys/sysevent/svm.h>
49 extern unit_t md_nunits;
50 extern set_t md_nsets;
51 extern md_set_t md_set[];
53 extern md_ops_t trans_md_ops;
54 extern major_t md_major;
59 static kmutex_t ml_lock;
60 static ml_unit_t *ul_list; /* List of all log units */
61 static int md_nlogs;
62 static kmutex_t ut_mutex; /* per log list of metatrans units */
63 static kmutex_t oc_mutex; /* single threads opens/closes */
65 static void md_free_cirbuf(cirbuf_ic_t *cb);
67 #define IOWAIT(bp) sema_p(&bp->b_io)
68 #define IODONE(bp) sema_v(&bp->b_io)
70 void
71 _init_ldl(void)
73 mutex_init(&ut_mutex, NULL, MUTEX_DRIVER, NULL);
74 mutex_init(&oc_mutex, NULL, MUTEX_DRIVER, NULL);
75 mutex_init(&ml_lock, NULL, MUTEX_DRIVER, NULL);
78 void
79 _fini_ldl(void)
81 mutex_destroy(&ut_mutex);
82 mutex_destroy(&oc_mutex);
83 mutex_destroy(&ml_lock);
86 static void
87 ldl_errorstate(ml_unit_t *ul)
89 char *str;
91 if (ldl_iserror(ul))
92 str = "Error";
93 else if (ldl_isherror(ul))
94 str = "Hard Error";
95 else
96 str = "Okay";
98 cmn_err(CE_WARN, "md: logging device: %s changed state to %s",
99 md_devname(mddb_getsetnum(ul->un_recid), ul->un_dev, NULL, 0), str);
104 * atomically commit the log unit struct and any underlying metadevice struct
106 static void
107 logcommitdb(ml_unit_t *ul)
109 mddb_recid_t recids[4];
111 TRANSSTATS(ts_logcommitdb);
113 uniqtime32(&ul->un_timestamp);
116 * commit the log device and its child (if metadevice)
118 recids[0] = ul->un_recid;
119 if (ul->un_status & LDL_METADEVICE) {
120 struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev));
121 recids[1] = c->un_record_id;
122 recids[2] = 0;
123 } else
124 recids[1] = 0;
126 mddb_commitrecs_wrapper(recids);
129 static void
130 md_alloc_wrbuf(cirbuf_ic_t *cb, size_t bufsize)
132 int i;
133 buf_t *bp;
136 * Clear previous allocation
138 if (cb->cb_nb)
139 md_free_cirbuf(cb);
141 bzero((caddr_t)cb, sizeof (*cb));
142 rw_init(&cb->cb_rwlock.lock, NULL, RW_DRIVER, NULL);
144 rw_enter(&cb->cb_rwlock.lock, RW_WRITER);
147 * preallocate 3 bp's and put them on the free list.
149 for (i = 0; i < 3; ++i) {
150 bp = md_trans_zalloc(sizeof (buf_t));
151 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
152 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
153 bp->b_offset = -1;
154 bp->b_forw = cb->cb_free;
155 cb->cb_free = bp;
157 TRANSSTATS(ts_alloc_bp);
160 cb->cb_va = md_trans_alloc(bufsize);
161 cb->cb_nb = bufsize;
164 * first bp claims entire write buffer
166 bp = cb->cb_free;
167 cb->cb_free = bp->b_forw;
169 bp->b_forw = bp;
170 bp->b_back = bp;
171 cb->cb_bp = bp;
172 bp->b_un.b_addr = cb->cb_va;
173 bp->b_bufsize = cb->cb_nb;
175 rw_exit(&cb->cb_rwlock.lock);
178 static void
179 md_alloc_rdbuf(cirbuf_ic_t *cb, size_t bufsize, size_t blksize)
181 caddr_t va;
182 size_t nb;
183 buf_t *bp;
186 * Clear previous allocation
188 if (cb->cb_nb)
189 md_free_cirbuf(cb);
191 bzero((caddr_t)cb, sizeof (*cb));
192 rw_init(&cb->cb_rwlock.lock, NULL, RW_DRIVER, NULL);
194 rw_enter(&cb->cb_rwlock.lock, RW_WRITER);
196 cb->cb_va = md_trans_alloc(bufsize);
197 cb->cb_nb = bufsize;
200 * preallocate N bufs that are hard-sized to blksize
201 * in other words, the read buffer pool is a linked list
202 * of statically sized bufs.
204 va = cb->cb_va;
205 while ((nb = bufsize) != 0) {
206 if (nb > blksize)
207 nb = blksize;
208 bp = md_trans_alloc(sizeof (buf_t));
209 bzero((caddr_t)bp, sizeof (buf_t));
210 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
211 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
212 bp->b_un.b_addr = va;
213 bp->b_bufsize = nb;
214 bp->b_offset = -1;
215 if (cb->cb_bp) {
216 bp->b_forw = cb->cb_bp->b_forw;
217 bp->b_back = cb->cb_bp;
218 cb->cb_bp->b_forw->b_back = bp;
219 cb->cb_bp->b_forw = bp;
220 } else
221 bp->b_forw = bp->b_back = bp;
222 cb->cb_bp = bp;
224 TRANSSTATS(ts_alloc_bp);
226 bufsize -= nb;
227 va += nb;
230 rw_exit(&cb->cb_rwlock.lock);
234 static void
235 md_free_cirbuf(cirbuf_ic_t *cb)
237 buf_t *bp;
239 if (cb->cb_nb == 0)
240 return;
242 rw_enter(&cb->cb_rwlock.lock, RW_WRITER);
243 ASSERT(cb->cb_dirty == NULL);
246 * free the active bufs
248 while ((bp = cb->cb_bp) != NULL) {
249 if (bp == bp->b_forw)
250 cb->cb_bp = NULL;
251 else
252 cb->cb_bp = bp->b_forw;
253 bp->b_back->b_forw = bp->b_forw;
254 bp->b_forw->b_back = bp->b_back;
255 sema_destroy(&bp->b_sem);
256 sema_destroy(&bp->b_io);
257 md_trans_free(bp, sizeof (buf_t));
261 * free the free bufs
263 while ((bp = cb->cb_free) != NULL) {
264 cb->cb_free = bp->b_forw;
265 sema_destroy(&bp->b_sem);
266 sema_destroy(&bp->b_io);
267 md_trans_free(bp, sizeof (buf_t));
269 md_trans_free(cb->cb_va, cb->cb_nb);
270 cb->cb_va = NULL;
271 cb->cb_nb = 0;
272 rw_exit(&cb->cb_rwlock.lock);
273 rw_destroy(&cb->cb_rwlock.lock);
277 ldl_build_incore(ml_unit_t *ul, int snarfing)
279 size_t bufsize;
280 set_t setno;
282 setno = mddb_getsetnum(ul->un_recid);
284 ASSERT(ul->un_head_lof >= ul->un_bol_lof);
285 ASSERT(ul->un_bol_lof);
287 if (ul->un_status & LDL_BEING_RESET) {
288 mddb_setrecprivate(ul->un_recid, MD_PRV_PENDCLEAN);
289 return (1);
293 * If snarfing the log device,
294 * then remake the device number
295 * else (we are creating the log device)
296 * set the driver name in the shared name space.
298 if (snarfing) {
299 ul->un_dev = md_getdevnum(setno, mddb_getsidenum(setno),
300 ul->un_key, MD_NOTRUST_DEVT);
304 * With the current device id implementation there is possibility
305 * that we may have NODEV if the underlying can't be resolved at
306 * snarf time. If this is the case we want to be consistent with
307 * the normal behavior and continue to allow log to be put on the list.
308 * We delay the resolve of the dev_t so we can resolve at the open
309 * time of the log device by device id
311 if ((md_getmajor(ul->un_dev) == md_major) &&
312 (md_dev_exists(ul->un_dev) == 0)) {
313 return (1);
316 mutex_enter(&ml_lock);
319 * initialize incore structs
320 * LDL_FIND_TAIL flag indicates that all I/O must wait until the
321 * tail has been found.
323 ul->un_opencnt = 0;
324 ul->un_transcnt = 0;
325 ul->un_resv = 0;
326 ul->un_utlist = NULL;
327 ul->un_logmap = NULL;
328 ul->un_status |= LDL_FIND_TAIL;
329 ul->un_status &= ~LDL_SCAN_ACTIVE;
330 ASSERT(ul->un_devbsize == DEV_BSIZE);
332 mutex_init(&ul->un_log_mutex, NULL, MUTEX_DRIVER, NULL);
335 * allocate some read and write buffers
337 bufsize = md_ldl_bufsize(ul);
338 ul->un_rdbuf.cb_nb = 0;
339 md_alloc_rdbuf(&ul->un_rdbuf, bufsize, MAPBLOCKSIZE);
340 ul->un_wrbuf.cb_nb = 0;
341 md_alloc_wrbuf(&ul->un_wrbuf, bufsize);
343 if (snarfing) {
344 if (ul->un_error & LDL_ANYERROR) {
345 ul->un_error = LDL_HERROR;
346 ldl_errorstate(ul);
347 } else
348 ul->un_error = 0;
351 /* Put on the unit list */
352 ul->un_next = ul_list;
353 ul_list = ul;
354 md_nlogs++;
356 mutex_exit(&ml_lock);
357 return (0);
360 ml_unit_t *
361 ldl_findlog(mddb_recid_t recid)
363 ml_unit_t *ul;
366 * Find a unit struct by database recid
368 mutex_enter(&ml_lock);
369 for (ul = ul_list; ul; ul = ul->un_next)
370 if (ul->un_recid == recid)
371 break;
372 mutex_exit(&ml_lock);
373 return (ul);
377 * ldl_utadd adds a metatrans device to the log's list of mt devices.
378 * WARNING: top_end_sync() scans this list W/O locking for performance!!!
380 void
381 ldl_utadd(mt_unit_t *un)
383 ml_unit_t *ul = un->un_l_unit;
385 if (ul == NULL)
386 return;
388 mutex_enter(&ut_mutex);
389 un->un_next = ul->un_utlist;
390 ul->un_utlist = un;
391 ASSERT((ul->un_logmap == NULL) || (ul->un_logmap == un->un_logmap));
392 ul->un_logmap = un->un_logmap;
393 mutex_exit(&ut_mutex);
397 * ldl_utdel removes a metatrans device to the log's list of mt devices.
398 * WARNING: top_end_sync() scans this list W/O locking for performance!!!
400 static void
401 ldl_utdel(mt_unit_t *un)
403 ml_unit_t *ul = un->un_l_unit;
404 mt_unit_t **utp = &ul->un_utlist;
406 mutex_enter(&ut_mutex);
407 for (utp = &ul->un_utlist;
408 *utp && (*utp != un);
409 utp = &(*utp)->un_next);
410 if (*utp)
411 *utp = un->un_next;
412 un->un_l_unit = NULL;
413 mutex_exit(&ut_mutex);
416 mddb_recid_t
417 ldl_create(mdkey_t key, mt_unit_t *un)
419 ml_unit_t *ul;
420 mddb_recid_t recid;
421 struct timeval32 tv;
422 mddb_type_t typ1;
423 set_t setno;
425 setno = MD_UN2SET(un);
428 * Find a unit struct for this key and set
429 * If we found one then, we are done.
430 * Else create one.
432 mutex_enter(&ml_lock);
433 for (ul = ul_list; ul; ul = ul->un_next)
434 if ((ul->un_key == key) &&
435 (mddb_getsetnum(ul->un_recid) == setno))
436 break;
437 mutex_exit(&ml_lock);
439 if (ul)
440 return (ul->un_recid);
442 typ1 = (mddb_type_t)md_getshared_key(setno,
443 trans_md_ops.md_driver.md_drivername);
444 recid = mddb_createrec(ML_UNIT_ONDSZ, typ1, LOG_REC,
445 MD_CRO_32BIT | MD_CRO_TRANS_LOG, setno);
446 if (recid < 0)
447 return (recid);
448 mddb_setrecprivate(recid, MD_PRV_GOTIT);
450 ul = (ml_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*ul), 0);
452 ul->un_recid = recid;
453 ul->un_key = key;
454 ul->un_dev = md_getdevnum(setno, mddb_getsidenum(setno), key,
455 MD_NOTRUST_DEVT);
456 ul->un_bol_lof = (off32_t)dbtob(un->un_l_sblk);
457 ul->un_eol_lof = ul->un_bol_lof + (off32_t)dbtob(un->un_l_nblks);
458 ul->un_pwsblk = un->un_l_pwsblk;
459 ul->un_nblks = un->un_l_nblks;
460 ul->un_tblks = un->un_l_tblks;
461 ul->un_maxresv = un->un_l_maxresv;
462 ul->un_maxtransfer = (uint_t)dbtob(un->un_l_maxtransfer);
463 ul->un_devbsize = DEV_BSIZE;
466 * empty log
468 uniqtime32(&tv);
469 ul->un_head_lof = ul->un_bol_lof;
470 ul->un_tail_lof = ul->un_bol_lof;
471 ul->un_head_ident = tv.tv_sec;
472 ul->un_tail_ident = tv.tv_sec;
474 if (md_getmajor(ul->un_dev) == md_major)
475 ul->un_status |= LDL_METADEVICE;
477 md_set_parent(ul->un_dev, (int)MD_MULTI_PARENT);
478 (void) ldl_build_incore(ul, 0);
479 logcommitdb(ul);
480 return (recid);
484 ldl_open_dev(mt_unit_t *un, ml_unit_t *ul)
486 int err = 0;
487 md_dev64_t tmpdev;
488 minor_t mnum = MD_SID(un);
489 set_t setno = MD_MIN2SET(MD_SID(un));
490 side_t side = mddb_getsidenum(setno);
492 mutex_enter(&oc_mutex);
494 if (ul->un_opencnt) {
495 ul->un_opencnt++;
496 mutex_exit(&oc_mutex);
497 return (0);
500 tmpdev = ul->un_dev;
502 * Do the open by device id if it is regular device
504 if ((md_getmajor(tmpdev) != md_major) &&
505 md_devid_found(setno, side, ul->un_key) == 1) {
506 tmpdev = md_resolve_bydevid(mnum, tmpdev, ul->un_key);
508 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL);
509 ul->un_dev = tmpdev;
511 if (err == 0)
512 ul->un_opencnt++;
514 mutex_exit(&oc_mutex);
515 return (err);
518 void
519 ldl_close_dev(ml_unit_t *ul)
522 mutex_enter(&oc_mutex);
524 ul->un_opencnt--;
526 if (ul->un_opencnt) {
527 mutex_exit(&oc_mutex);
528 return;
531 /* Last reference to the log, close it */
532 md_layered_close(ul->un_dev, MD_OFLG_NULL);
534 mutex_exit(&oc_mutex);
539 * LOGSCAN STUFF
542 ldl_isherror(ml_unit_t *ul)
544 return ((ul != NULL) && (ul->un_error & LDL_HERROR));
548 ldl_iserror(ml_unit_t *ul)
550 return ((ul != NULL) && (ul->un_error & LDL_ERROR));
553 size_t
554 md_ldl_bufsize(ml_unit_t *ul)
556 size_t bufsize;
559 * initial guess is the maxtransfer value for this log device
560 * reduce by number of logs
561 * increase for sharing
562 * increase if too small
563 * decrease if too large
565 bufsize = ul->un_maxtransfer;
566 if (md_nlogs)
567 bufsize /= md_nlogs;
568 if (ul->un_transcnt)
569 bufsize *= ul->un_transcnt;
570 bufsize = dbtob(btod(bufsize));
571 if (bufsize < LDL_MINBUFSIZE)
572 bufsize = LDL_MINBUFSIZE;
573 if (bufsize > maxphys)
574 bufsize = maxphys;
575 if (bufsize > ul->un_maxtransfer)
576 bufsize = ul->un_maxtransfer;
577 return (bufsize);
581 * if necessary; open all underlying devices for ul and start threads
582 * called at snarf, metainit, and open
584 void
585 ldl_open_underlying(mt_unit_t *un)
587 ml_unit_t *ul = un->un_l_unit;
588 int err = 0;
592 * first, handle the case of detached logs
594 if (ul == NULL) {
595 err = trans_open_all_devs(un);
596 if (err == 0) {
597 un->un_flags &= ~TRANS_NEED_OPEN;
598 un->un_flags |= TRANS_OPENED;
604 * remove log unit struct from global linked list
606 static void
607 ldl_unlist(ml_unit_t *ul)
609 ml_unit_t **ulp;
612 * remove from list
614 mutex_enter(&ml_lock);
615 for (ulp = &ul_list; *ulp && (*ulp != ul); ulp = &(*ulp)->un_next);
616 if (*ulp) {
617 *ulp = ul->un_next;
618 --md_nlogs;
620 mutex_exit(&ml_lock);
624 * get rid of a log unit from the database
626 void
627 ldl_cleanup(ml_unit_t *ul)
629 sv_dev_t sv;
631 /* Save the log key */
632 sv.setno = mddb_getsetnum(ul->un_recid);
633 sv.key = ul->un_key;
635 mddb_deleterec_wrapper(ul->un_recid);
636 md_rem_names(&sv, 1);
639 static void
640 ldl_delete(ml_unit_t *ul, int removing)
644 * remove from list
646 ldl_unlist(ul);
649 * free up resources
651 md_free_cirbuf(&ul->un_rdbuf);
652 md_free_cirbuf(&ul->un_wrbuf);
654 mutex_destroy(&ul->un_log_mutex);
656 if (removing) {
657 md_reset_parent(ul->un_dev);
658 ul->un_status |= LDL_BEING_RESET;
659 logcommitdb(ul);
660 ldl_cleanup(ul);
665 * detach log from trans device
666 * caller insures that trans device is idle and will remain idle
668 /* ARGSUSED */
670 ldl_reset(mt_unit_t *un, int removing, int force)
672 ml_unit_t *ul = un->un_l_unit;
674 if (ul == NULL)
675 return (0);
677 if (un->un_flags & TRANS_DETACHING) {
678 un->un_flags &= ~TRANS_DETACHING;
679 un->un_flags |= TRANS_DETACHED;
680 trans_commit(un, 0);
684 * remove this metatrans device from the log's list of mt devices
686 ldl_utdel(un);
689 * busy; do nothing
691 if (ul->un_utlist)
692 return (0);
694 ldl_delete(ul, removing);
696 return (0);