uts: make emu10k non-verbose
[unleashed.git] / kernel / os / devcache.c
blob67a2b35c71f4d8b7441c2bd472801fe4716ba8bd
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/note.h>
27 #include <sys/t_lock.h>
28 #include <sys/cmn_err.h>
29 #include <sys/instance.h>
30 #include <sys/conf.h>
31 #include <sys/stat.h>
32 #include <sys/ddi.h>
33 #include <sys/hwconf.h>
34 #include <sys/sunddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ndi_impldefs.h>
38 #include <sys/modctl.h>
39 #include <sys/dacf.h>
40 #include <sys/promif.h>
41 #include <sys/cpuvar.h>
42 #include <sys/pathname.h>
43 #include <sys/kobj.h>
44 #include <sys/devcache.h>
45 #include <sys/devcache_impl.h>
46 #include <sys/sysmacros.h>
47 #include <sys/varargs.h>
48 #include <sys/callb.h>
51 * This facility provides interfaces to clients to register,
52 * read and update cache data in persisted backing store files,
53 * usually in /etc/devices. The data persisted through this
54 * mechanism should be stateless data, functioning in the sense
55 * of a cache. Writes are performed by a background daemon
56 * thread, permitting a client to schedule an update without
57 * blocking, then continue updating the data state in
58 * parallel. The data is only locked by the daemon thread
59 * to pack the data in preparation for the write.
61 * Data persisted through this mechanism should be capable
62 * of being regenerated through normal system operation,
63 * for example attaching all disk devices would cause all
64 * devids to be registered for those devices. By caching
65 * a devid-device tuple, the system can operate in a
66 * more optimal way, directly attaching the device mapped
67 * to a devid, rather than burdensomely driving attach of
68 * the entire device tree to discover a single device.
70 * Note that a client should only need to include
71 * <sys/devcache.h> for the supported interfaces.
73 * The data per client is entirely within the control of
74 * the client. When reading, data unpacked from the backing
75 * store should be inserted in the list. The pointer to
76 * the list can be retrieved via nvf_list(). When writing,
77 * the data on the list is to be packed and returned to the
78 * nvpdaemon as an nvlist.
80 * Obvious restrictions are imposed by the limits of the
81 * nvlist format. The data cannot be read or written
82 * piecemeal, and large amounts of data aren't recommended.
83 * However, nvlists do allow that data be named and typed
84 * and can be size-of-int invariant, and the cached data
85 * can be versioned conveniently.
87 * The registration involves two steps: a handle is
88 * allocated by calling the registration function.
89 * This sets up the data referenced by the handle and
90 * initializes the lock. Following registration, the
91 * client must initialize the data list. The list
92 * interfaces require that the list element with offset
93 * to the node link be provided. The format of the
94 * list element is under the control of the client.
96 * Locking: the address of the data list r/w lock provided
97 * can be accessed with nvf_lock(). The lock must be held
98 * as reader when traversing the list or checking state,
99 * such as nvf_is_dirty(). The lock must be held as
100 * writer when updating the list or marking it dirty.
101 * The lock must not be held when waking the daemon.
103 * The data r/w lock is held as writer when the pack,
104 * unpack and free list handlers are called. The
105 * lock should not be dropped and must be still held
106 * upon return. The client should also hold the lock
107 * as reader when checking if the list is dirty, and
108 * as writer when marking the list dirty or initiating
109 * a read.
111 * The asynchronous nature of updates allows for the
112 * possibility that the data may continue to be updated
113 * once the daemon has been notified that an update is
114 * desired. The data only needs to be locked against
115 * updates when packing the data into the form to be
116 * written. When the write of the packed data has
117 * completed, the daemon will automatically reschedule
118 * an update if the data was marked dirty after the
119 * point at which it was packed. Before beginning an
120 * update, the daemon attempts to lock the data as
121 * writer; if the writer lock is already held, it
122 * backs off and retries later. The model is to give
123 * priority to the kernel processes generating the
124 * data, and that the nature of the data is that
125 * it does not change often, can be re-generated when
126 * needed, so updates should not happen often and
127 * can be delayed until the data stops changing.
128 * The client may update the list or mark it dirty
129 * any time it is able to acquire the lock as
130 * writer first.
132 * A failed write will be retried after some delay,
133 * in the hope that the cause of the error will be
134 * transient, for example a filesystem with no space
135 * available. An update on a read-only filesystem
136 * is failed silently and not retried; this would be
137 * the case when booted off install media.
139 * There is no unregister mechanism as of yet, as it
140 * hasn't been needed so far.
144 * Global list of files registered and updated by the nvpflush
145 * daemon, protected by the nvf_cache_mutex. While an
146 * update is taking place, a file is temporarily moved to
147 * the dirty list to avoid locking the primary list for
148 * the duration of the update.
150 list_t nvf_cache_files;
151 list_t nvf_dirty_files;
152 kmutex_t nvf_cache_mutex;
156 * Allow some delay from an update of the data before flushing
157 * to permit simultaneous updates of multiple changes.
158 * Changes in the data are expected to be bursty, ie
159 * reconfig or hot-plug of a new adapter.
161 * kfio_report_error (default 0)
162 * Set to 1 to enable some error messages related to low-level
163 * kernel file i/o operations.
165 * nvpflush_delay (default 10)
166 * The number of seconds after data is marked dirty before the
167 * flush daemon is triggered to flush the data. A longer period
168 * of time permits more data updates per write. Note that
169 * every update resets the timer so no repository write will
170 * occur while data is being updated continuously.
172 * nvpdaemon_idle_time (default 60)
173 * The number of seconds the daemon will sleep idle before exiting.
176 #define NVPFLUSH_DELAY 10
177 #define NVPDAEMON_IDLE_TIME 60
179 #define TICKS_PER_SECOND (drv_usectohz(1000000))
182 * Tunables
184 int kfio_report_error = 0; /* kernel file i/o operations */
185 int kfio_disable_read = 0; /* disable all reads */
186 int kfio_disable_write = 0; /* disable all writes */
188 int nvpflush_delay = NVPFLUSH_DELAY;
189 int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME;
191 static timeout_id_t nvpflush_id = 0;
192 static int nvpflush_timer_busy = 0;
193 static int nvpflush_daemon_active = 0;
194 static kthread_t *nvpflush_thr_id = 0;
196 static int do_nvpflush = 0;
197 static int nvpbusy = 0;
198 static kmutex_t nvpflush_lock;
199 static kcondvar_t nvpflush_cv;
200 static kthread_id_t nvpflush_thread;
201 static clock_t nvpticks;
203 static void nvpflush_daemon(void);
205 #ifdef DEBUG
206 int nvpdaemon_debug = 0;
207 int kfio_debug = 0;
208 #endif /* DEBUG */
210 extern int modrootloaded;
211 extern void mdi_read_devices_files(void);
212 extern void mdi_clean_vhcache(void);
213 extern int sys_shutdown;
216 * Initialize the overall cache file management
218 void
219 i_ddi_devices_init(void)
221 list_create(&nvf_cache_files, sizeof (nvfd_t),
222 offsetof(nvfd_t, nvf_link));
223 list_create(&nvf_dirty_files, sizeof (nvfd_t),
224 offsetof(nvfd_t, nvf_link));
225 mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
226 retire_store_init();
227 devid_cache_init();
231 * Read cache files
232 * The files read here should be restricted to those
233 * that may be required to mount root.
235 void
236 i_ddi_read_devices_files(void)
239 * The retire store should be the first file read as it may need to
240 * offline devices. kfio_disable_read is not used for retire. For
241 * the rationale see the tunable ddi_retire_store_bypass and
242 * comments in: kernel/os/retire_store.c
245 retire_store_read();
247 if (!kfio_disable_read) {
248 mdi_read_devices_files();
249 devid_cache_read();
253 void
254 i_ddi_start_flush_daemon(void)
256 nvfd_t *nvfdp;
258 ASSERT(i_ddi_io_initialized());
260 mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL);
261 cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL);
263 mutex_enter(&nvf_cache_mutex);
264 for (nvfdp = list_head(&nvf_cache_files); nvfdp;
265 nvfdp = list_next(&nvf_cache_files, nvfdp)) {
266 if (NVF_IS_DIRTY(nvfdp)) {
267 nvf_wake_daemon();
268 break;
271 mutex_exit(&nvf_cache_mutex);
274 void
275 i_ddi_clean_devices_files(void)
277 devid_cache_cleanup();
278 mdi_clean_vhcache();
282 * Register a cache file to be managed and updated by the nvpflush daemon.
283 * All operations are performed through the returned handle.
284 * There is no unregister mechanism for now.
286 nvf_handle_t
287 nvf_register_file(nvf_ops_t *ops)
289 nvfd_t *nvfdp;
291 nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP);
293 nvfdp->nvf_ops = ops;
294 nvfdp->nvf_flags = 0;
295 rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL);
297 mutex_enter(&nvf_cache_mutex);
298 list_insert_tail(&nvf_cache_files, nvfdp);
299 mutex_exit(&nvf_cache_mutex);
301 return ((nvf_handle_t)nvfdp);
304 /*PRINTFLIKE1*/
305 void
306 nvf_error(const char *fmt, ...)
308 va_list ap;
310 if (kfio_report_error) {
311 va_start(ap, fmt);
312 vcmn_err(CE_NOTE, fmt, ap);
313 va_end(ap);
318 * Some operations clients may use to manage the data
319 * to be persisted in a cache file.
321 char *
322 nvf_cache_name(nvf_handle_t handle)
324 return (((nvfd_t *)handle)->nvf_cache_path);
327 krwlock_t *
328 nvf_lock(nvf_handle_t handle)
330 return (&(((nvfd_t *)handle)->nvf_lock));
333 list_t *
334 nvf_list(nvf_handle_t handle)
336 return (&(((nvfd_t *)handle)->nvf_data_list));
339 void
340 nvf_mark_dirty(nvf_handle_t handle)
342 ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock)));
343 NVF_MARK_DIRTY((nvfd_t *)handle);
347 nvf_is_dirty(nvf_handle_t handle)
349 ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock)));
350 return (NVF_IS_DIRTY((nvfd_t *)handle));
353 static uint16_t
354 nvp_cksum(uchar_t *buf, int64_t buflen)
356 uint16_t cksum = 0;
357 uint16_t *p = (uint16_t *)buf;
358 int64_t n;
360 if ((buflen & 0x01) != 0) {
361 buflen--;
362 cksum = buf[buflen];
364 n = buflen / 2;
365 while (n-- > 0)
366 cksum ^= *p++;
367 return (cksum);
371 fread_nvlist(char *filename, nvlist_t **ret_nvlist)
373 struct _buf *file;
374 nvpf_hdr_t hdr;
375 char *buf;
376 nvlist_t *nvl;
377 int rval;
378 uint_t offset;
379 int n;
380 char c;
381 uint16_t cksum, hdrsum;
383 *ret_nvlist = NULL;
385 file = kobj_open_file(filename);
386 if (file == (struct _buf *)-1) {
387 KFDEBUG((CE_CONT, "cannot open file: %s\n", filename));
388 return (ENOENT);
391 offset = 0;
392 n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset);
393 if (n != sizeof (hdr)) {
394 kobj_close_file(file);
395 if (n < 0) {
396 nvf_error("error reading header: %s\n", filename);
397 return (EIO);
398 } else if (n == 0) {
399 KFDEBUG((CE_CONT, "file empty: %s\n", filename));
400 } else {
401 nvf_error("header size incorrect: %s\n", filename);
403 return (EINVAL);
405 offset += n;
407 KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic));
408 KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version));
409 KFDEBUG2((CE_CONT, "nvpf_size: %lld\n",
410 (longlong_t)hdr.nvpf_size));
411 KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n",
412 hdr.nvpf_hdr_chksum));
413 KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum));
415 cksum = hdr.nvpf_hdr_chksum;
416 hdr.nvpf_hdr_chksum = 0;
417 hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr));
419 if (hdr.nvpf_magic != NVPF_HDR_MAGIC ||
420 hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) {
421 kobj_close_file(file);
422 if (hdrsum != cksum) {
423 nvf_error("%s: checksum error "
424 "(actual 0x%x, expected 0x%x)\n",
425 filename, hdrsum, cksum);
427 nvf_error("%s: header information incorrect", filename);
428 return (EINVAL);
431 ASSERT(hdr.nvpf_size >= 0);
433 buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP);
434 n = kobj_read_file(file, buf, hdr.nvpf_size, offset);
435 if (n != hdr.nvpf_size) {
436 kmem_free(buf, hdr.nvpf_size);
437 kobj_close_file(file);
438 if (n < 0) {
439 nvf_error("%s: read error %d", filename, n);
440 } else {
441 nvf_error("%s: incomplete read %d/%lld",
442 filename, n, (longlong_t)hdr.nvpf_size);
444 return (EINVAL);
446 offset += n;
448 rval = kobj_read_file(file, &c, 1, offset);
449 kobj_close_file(file);
450 if (rval > 0) {
451 nvf_error("%s is larger than %lld\n",
452 filename, (longlong_t)hdr.nvpf_size);
453 kmem_free(buf, hdr.nvpf_size);
454 return (EINVAL);
457 cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size);
458 if (hdr.nvpf_chksum != cksum) {
459 nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
460 filename, hdr.nvpf_chksum, cksum);
461 kmem_free(buf, hdr.nvpf_size);
462 return (EINVAL);
465 nvl = NULL;
466 rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0);
467 if (rval != 0) {
468 nvf_error("%s: error %d unpacking nvlist\n",
469 filename, rval);
470 kmem_free(buf, hdr.nvpf_size);
471 return (EINVAL);
474 kmem_free(buf, hdr.nvpf_size);
475 *ret_nvlist = nvl;
476 return (0);
479 static int
480 kfcreate(char *filename, kfile_t **kfilep)
482 kfile_t *fp;
483 int rval;
485 ASSERT(modrootloaded);
487 fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP);
489 fp->kf_vnflags = FCREAT | FWRITE | FTRUNC;
490 fp->kf_fname = filename;
491 fp->kf_fpos = 0;
492 fp->kf_state = 0;
494 KFDEBUG((CE_CONT, "create: %s flags 0x%x\n",
495 filename, fp->kf_vnflags));
496 rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags,
497 0444, &fp->kf_vp, CRCREAT, 0);
498 if (rval != 0) {
499 kmem_free(fp, sizeof (kfile_t));
500 KFDEBUG((CE_CONT, "%s: create error %d\n",
501 filename, rval));
502 return (rval);
505 *kfilep = fp;
506 return (0);
509 static int
510 kfremove(char *filename)
512 int rval;
514 KFDEBUG((CE_CONT, "remove: %s\n", filename));
515 rval = vn_remove(filename, UIO_SYSSPACE, RMFILE);
516 if (rval != 0) {
517 KFDEBUG((CE_CONT, "%s: remove error %d\n",
518 filename, rval));
520 return (rval);
523 static int
524 kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
526 ssize_t resid;
527 int err;
528 ssize_t n;
530 ASSERT(modrootloaded);
532 if (fp->kf_state != 0)
533 return (fp->kf_state);
535 err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos,
536 UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid);
537 if (err != 0) {
538 KFDEBUG((CE_CONT, "%s: read error %d\n",
539 fp->kf_fname, err));
540 fp->kf_state = err;
541 return (err);
544 ASSERT(resid >= 0 && resid <= bufsiz);
545 n = bufsiz - resid;
547 KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
548 fp->kf_fname, n, bufsiz, resid));
550 fp->kf_fpos += n;
551 *ret_n = n;
552 return (0);
555 static int
556 kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
558 rlim64_t rlimit;
559 ssize_t resid;
560 int err;
561 ssize_t len;
562 ssize_t n = 0;
564 ASSERT(modrootloaded);
566 if (fp->kf_state != 0)
567 return (fp->kf_state);
569 len = bufsiz;
570 rlimit = bufsiz + 1;
571 for (;;) {
572 err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos,
573 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
574 if (err) {
575 KFDEBUG((CE_CONT, "%s: write error %d\n",
576 fp->kf_fname, err));
577 fp->kf_state = err;
578 return (err);
581 KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n",
582 fp->kf_fname, len-resid, resid));
584 ASSERT(resid >= 0 && resid <= len);
586 n += (len - resid);
587 if (resid == 0)
588 break;
590 if (resid == len) {
591 KFDEBUG((CE_CONT, "%s: filesystem full?\n",
592 fp->kf_fname));
593 fp->kf_state = ENOSPC;
594 return (ENOSPC);
597 len -= resid;
598 buf += len;
599 fp->kf_fpos += len;
600 len = resid;
603 ASSERT(n == bufsiz);
604 KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n));
606 *ret_n = n;
607 return (0);
611 static int
612 kfclose(kfile_t *fp)
614 int rval;
616 KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname));
618 if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) {
619 rval = fop_fsync(fp->kf_vp, FSYNC, kcred, NULL);
620 if (rval != 0) {
621 nvf_error("%s: sync error %d\n",
622 fp->kf_fname, rval);
624 KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname));
627 rval = fop_close(fp->kf_vp, fp->kf_vnflags, 1,
628 (offset_t)0, kcred, NULL);
629 if (rval != 0) {
630 if (fp->kf_state == 0) {
631 nvf_error("%s: close error %d\n",
632 fp->kf_fname, rval);
634 } else {
635 if (fp->kf_state == 0)
636 KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname));
639 VN_RELE(fp->kf_vp);
640 kmem_free(fp, sizeof (kfile_t));
641 return (rval);
644 static int
645 kfrename(char *oldname, char *newname)
647 int rval;
649 ASSERT(modrootloaded);
651 KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname));
653 if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) {
654 KFDEBUG((CE_CONT, "rename %s to %s: %d\n",
655 oldname, newname, rval));
658 return (rval);
662 fwrite_nvlist(char *filename, nvlist_t *nvl)
664 char *buf;
665 char *nvbuf;
666 kfile_t *fp;
667 char *newname;
668 int len, err, err1;
669 size_t buflen;
670 ssize_t n;
672 ASSERT(modrootloaded);
674 nvbuf = NULL;
675 err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
676 if (err != 0) {
677 nvf_error("%s: error %d packing nvlist\n",
678 filename, err);
679 return (err);
682 buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP);
683 bzero(buf, sizeof (nvpf_hdr_t));
685 ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC;
686 ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION;
687 ((nvpf_hdr_t *)buf)->nvpf_size = buflen;
688 ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen);
689 ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum =
690 nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t));
692 bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen);
693 kmem_free(nvbuf, buflen);
694 buflen += sizeof (nvpf_hdr_t);
696 len = strlen(filename) + MAX_SUFFIX_LEN + 2;
697 newname = kmem_alloc(len, KM_SLEEP);
700 (void) sprintf(newname, "%s.%s", filename, NEW_FILENAME_SUFFIX);
703 * To make it unlikely we suffer data loss, write
704 * data to the new temporary file. Once successful
705 * complete the transaction by renaming the new file
706 * to replace the previous.
709 if ((err = kfcreate(newname, &fp)) == 0) {
710 err = kfwrite(fp, buf, buflen, &n);
711 if (err) {
712 nvf_error("%s: write error - %d\n",
713 newname, err);
714 } else {
715 if (n != buflen) {
716 nvf_error(
717 "%s: partial write %ld of %ld bytes\n",
718 newname, n, buflen);
719 nvf_error("%s: filesystem may be full?\n",
720 newname);
721 err = EIO;
724 if ((err1 = kfclose(fp)) != 0) {
725 nvf_error("%s: close error\n", newname);
726 if (err == 0)
727 err = err1;
729 if (err != 0) {
730 if (kfremove(newname) != 0) {
731 nvf_error("%s: remove failed\n",
732 newname);
735 } else {
736 nvf_error("%s: create failed - %d\n", filename, err);
739 if (err == 0) {
740 if ((err = kfrename(newname, filename)) != 0) {
741 nvf_error("%s: rename from %s failed\n",
742 newname, filename);
746 kmem_free(newname, len);
747 kmem_free(buf, buflen);
749 return (err);
752 static int
753 e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
755 int err;
757 if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0)
758 return (DDI_SUCCESS);
759 else {
760 if (err == EROFS)
761 NVF_MARK_READONLY(nvfd);
762 return (DDI_FAILURE);
766 static void
767 nvp_list_free(nvfd_t *nvf)
769 ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
770 (nvf->nvf_list_free)((nvf_handle_t)nvf);
771 ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
775 * Read a file in the nvlist format
776 * EIO - i/o error during read
777 * ENOENT - file not found
778 * EINVAL - file contents corrupted
780 static int
781 fread_nvp_list(nvfd_t *nvfd)
783 nvlist_t *nvl;
784 nvpair_t *nvp;
785 char *name;
786 nvlist_t *sublist;
787 int rval;
788 int rv;
790 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
792 rval = fread_nvlist(nvfd->nvf_cache_path, &nvl);
793 if (rval != 0)
794 return (rval);
795 ASSERT(nvl != NULL);
797 nvp = NULL;
798 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
799 name = nvpair_name(nvp);
800 ASSERT(strlen(name) > 0);
802 switch (nvpair_type(nvp)) {
803 case DATA_TYPE_NVLIST:
804 rval = nvpair_value_nvlist(nvp, &sublist);
805 if (rval != 0) {
806 nvf_error(
807 "nvpair_value_nvlist error %s %d\n",
808 name, rval);
809 goto error;
813 * unpack nvlist for this device and
814 * add elements to data list.
816 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
817 rv = (nvfd->nvf_unpack_nvlist)
818 ((nvf_handle_t)nvfd, sublist, name);
819 ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
820 if (rv != 0) {
821 nvf_error(
822 "%s: %s invalid list element\n",
823 nvfd->nvf_cache_path, name);
824 rval = EINVAL;
825 goto error;
827 break;
829 default:
830 nvf_error("%s: %s unsupported data type %d\n",
831 nvfd->nvf_cache_path, name, nvpair_type(nvp));
832 rval = EINVAL;
833 goto error;
837 nvlist_free(nvl);
839 return (0);
841 error:
842 nvlist_free(nvl);
843 nvp_list_free(nvfd);
844 return (rval);
849 nvf_read_file(nvf_handle_t nvf_handle)
851 nvfd_t *nvfd = (nvfd_t *)nvf_handle;
852 int rval;
854 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
856 if (kfio_disable_read)
857 return (0);
859 KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path));
861 rval = fread_nvp_list(nvfd);
862 if (rval) {
863 switch (rval) {
864 case EIO:
865 nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
866 cmn_err(CE_WARN, "%s: I/O error",
867 nvfd->nvf_cache_path);
868 break;
869 case ENOENT:
870 nvfd->nvf_flags |= NVF_F_CREATE_MSG;
871 nvf_error("%s: not found\n",
872 nvfd->nvf_cache_path);
873 break;
874 case EINVAL:
875 default:
876 nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
877 cmn_err(CE_WARN, "%s: data file corrupted",
878 nvfd->nvf_cache_path);
879 break;
882 return (rval);
885 static void
886 nvf_write_is_complete(nvfd_t *fd)
888 if (fd->nvf_write_complete) {
889 (fd->nvf_write_complete)((nvf_handle_t)fd);
893 /*ARGSUSED*/
894 static void
895 nvpflush_timeout(void *arg)
897 clock_t nticks;
899 mutex_enter(&nvpflush_lock);
900 nticks = nvpticks - ddi_get_lbolt();
901 if (nticks > 4) {
902 nvpflush_timer_busy = 1;
903 mutex_exit(&nvpflush_lock);
904 nvpflush_id = timeout(nvpflush_timeout, NULL, nticks);
905 } else {
906 do_nvpflush = 1;
907 NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n"));
908 cv_signal(&nvpflush_cv);
909 nvpflush_id = 0;
910 nvpflush_timer_busy = 0;
911 mutex_exit(&nvpflush_lock);
916 * After marking a list as dirty, wake the nvpflush daemon
917 * to perform the update.
919 void
920 nvf_wake_daemon(void)
922 clock_t nticks;
925 * If the system isn't up yet or is shutting down,
926 * don't even think about starting a flush.
928 if (!i_ddi_io_initialized() || sys_shutdown)
929 return;
931 mutex_enter(&nvpflush_lock);
933 if (nvpflush_daemon_active == 0) {
934 nvpflush_daemon_active = 1;
935 mutex_exit(&nvpflush_lock);
936 NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n"));
937 nvpflush_thr_id = thread_create(NULL, 0,
938 (void (*)())nvpflush_daemon,
939 NULL, 0, &p0, TS_RUN, minclsyspri);
940 mutex_enter(&nvpflush_lock);
943 nticks = nvpflush_delay * TICKS_PER_SECOND;
944 nvpticks = ddi_get_lbolt() + nticks;
945 if (nvpflush_timer_busy == 0) {
946 nvpflush_timer_busy = 1;
947 mutex_exit(&nvpflush_lock);
948 nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4);
949 } else
950 mutex_exit(&nvpflush_lock);
953 static int
954 nvpflush_one(nvfd_t *nvfd)
956 int rval = DDI_SUCCESS;
957 nvlist_t *nvl;
959 rw_enter(&nvfd->nvf_lock, RW_READER);
961 ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0);
963 if (!NVF_IS_DIRTY(nvfd) ||
964 NVF_IS_READONLY(nvfd) || kfio_disable_write || sys_shutdown) {
965 NVF_CLEAR_DIRTY(nvfd);
966 rw_exit(&nvfd->nvf_lock);
967 return (DDI_SUCCESS);
970 if (rw_tryupgrade(&nvfd->nvf_lock) == 0) {
971 nvf_error("nvpflush: "
972 "%s rw upgrade failed\n", nvfd->nvf_cache_path);
973 rw_exit(&nvfd->nvf_lock);
974 return (DDI_FAILURE);
976 if (((nvfd->nvf_pack_list)
977 ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) {
978 nvf_error("nvpflush: "
979 "%s nvlist construction failed\n", nvfd->nvf_cache_path);
980 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
981 rw_exit(&nvfd->nvf_lock);
982 return (DDI_FAILURE);
984 ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
986 NVF_CLEAR_DIRTY(nvfd);
987 nvfd->nvf_flags |= NVF_F_FLUSHING;
988 rw_exit(&nvfd->nvf_lock);
990 rval = e_fwrite_nvlist(nvfd, nvl);
991 nvlist_free(nvl);
993 rw_enter(&nvfd->nvf_lock, RW_WRITER);
994 nvfd->nvf_flags &= ~NVF_F_FLUSHING;
995 if (rval == DDI_FAILURE) {
996 if (NVF_IS_READONLY(nvfd)) {
997 rval = DDI_SUCCESS;
998 nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY);
999 } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) {
1000 cmn_err(CE_CONT,
1001 "%s: update failed\n", nvfd->nvf_cache_path);
1002 nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY;
1004 } else {
1005 if (nvfd->nvf_flags & NVF_F_CREATE_MSG) {
1006 cmn_err(CE_CONT,
1007 "!Creating %s\n", nvfd->nvf_cache_path);
1008 nvfd->nvf_flags &= ~NVF_F_CREATE_MSG;
1010 if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) {
1011 cmn_err(CE_CONT,
1012 "!Rebuilding %s\n", nvfd->nvf_cache_path);
1013 nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG;
1015 if (nvfd->nvf_flags & NVF_F_ERROR) {
1016 cmn_err(CE_CONT,
1017 "%s: update now ok\n", nvfd->nvf_cache_path);
1018 nvfd->nvf_flags &= ~NVF_F_ERROR;
1021 * The file may need to be flushed again if the cached
1022 * data was touched while writing the earlier contents.
1024 if (NVF_IS_DIRTY(nvfd))
1025 rval = DDI_FAILURE;
1028 rw_exit(&nvfd->nvf_lock);
1029 return (rval);
1033 static void
1034 nvpflush_daemon(void)
1036 callb_cpr_t cprinfo;
1037 nvfd_t *nvfdp, *nextfdp;
1038 clock_t clk;
1039 int rval;
1040 int want_wakeup;
1041 int is_now_clean;
1043 ASSERT(modrootloaded);
1045 nvpflush_thread = curthread;
1046 NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n"));
1048 CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp");
1049 mutex_enter(&nvpflush_lock);
1050 for (;;) {
1051 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1052 while (do_nvpflush == 0) {
1053 clk = cv_reltimedwait(&nvpflush_cv, &nvpflush_lock,
1054 (nvpdaemon_idle_time * TICKS_PER_SECOND),
1055 TR_CLOCK_TICK);
1056 if ((clk == -1 && do_nvpflush == 0 &&
1057 nvpflush_timer_busy == 0) || sys_shutdown) {
1059 * Note that CALLB_CPR_EXIT calls mutex_exit()
1060 * on the lock passed in to CALLB_CPR_INIT,
1061 * so the lock must be held when invoking it.
1063 CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1064 NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n"));
1065 ASSERT(mutex_owned(&nvpflush_lock));
1066 nvpflush_thr_id = NULL;
1067 nvpflush_daemon_active = 0;
1068 CALLB_CPR_EXIT(&cprinfo);
1069 thread_exit();
1072 CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1074 nvpbusy = 1;
1075 want_wakeup = 0;
1076 do_nvpflush = 0;
1077 mutex_exit(&nvpflush_lock);
1080 * Try flushing what's dirty, reschedule if there's
1081 * a failure or data gets marked as dirty again.
1082 * First move each file marked dirty to the dirty
1083 * list to avoid locking the list across the write.
1085 mutex_enter(&nvf_cache_mutex);
1086 for (nvfdp = list_head(&nvf_cache_files);
1087 nvfdp; nvfdp = nextfdp) {
1088 nextfdp = list_next(&nvf_cache_files, nvfdp);
1089 rw_enter(&nvfdp->nvf_lock, RW_READER);
1090 if (NVF_IS_DIRTY(nvfdp)) {
1091 list_remove(&nvf_cache_files, nvfdp);
1092 list_insert_tail(&nvf_dirty_files, nvfdp);
1093 rw_exit(&nvfdp->nvf_lock);
1094 } else {
1095 NVPDAEMON_DEBUG((CE_CONT,
1096 "nvpdaemon: not dirty %s\n",
1097 nvfdp->nvf_cache_path));
1098 rw_exit(&nvfdp->nvf_lock);
1101 mutex_exit(&nvf_cache_mutex);
1104 * Now go through the dirty list
1106 for (nvfdp = list_head(&nvf_dirty_files);
1107 nvfdp; nvfdp = nextfdp) {
1108 nextfdp = list_next(&nvf_dirty_files, nvfdp);
1110 is_now_clean = 0;
1111 rw_enter(&nvfdp->nvf_lock, RW_READER);
1112 if (NVF_IS_DIRTY(nvfdp)) {
1113 NVPDAEMON_DEBUG((CE_CONT,
1114 "nvpdaemon: flush %s\n",
1115 nvfdp->nvf_cache_path));
1116 rw_exit(&nvfdp->nvf_lock);
1117 rval = nvpflush_one(nvfdp);
1118 rw_enter(&nvfdp->nvf_lock, RW_READER);
1119 if (rval != DDI_SUCCESS ||
1120 NVF_IS_DIRTY(nvfdp)) {
1121 rw_exit(&nvfdp->nvf_lock);
1122 NVPDAEMON_DEBUG((CE_CONT,
1123 "nvpdaemon: %s dirty again\n",
1124 nvfdp->nvf_cache_path));
1125 want_wakeup = 1;
1126 } else {
1127 rw_exit(&nvfdp->nvf_lock);
1128 nvf_write_is_complete(nvfdp);
1129 is_now_clean = 1;
1131 } else {
1132 NVPDAEMON_DEBUG((CE_CONT,
1133 "nvpdaemon: not dirty %s\n",
1134 nvfdp->nvf_cache_path));
1135 rw_exit(&nvfdp->nvf_lock);
1136 is_now_clean = 1;
1139 if (is_now_clean) {
1140 mutex_enter(&nvf_cache_mutex);
1141 list_remove(&nvf_dirty_files, nvfdp);
1142 list_insert_tail(&nvf_cache_files,
1143 nvfdp);
1144 mutex_exit(&nvf_cache_mutex);
1148 if (want_wakeup)
1149 nvf_wake_daemon();
1151 mutex_enter(&nvpflush_lock);
1152 nvpbusy = 0;