4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/t_lock.h>
28 #include <sys/cmn_err.h>
29 #include <sys/instance.h>
33 #include <sys/hwconf.h>
34 #include <sys/sunddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/ndi_impldefs.h>
38 #include <sys/modctl.h>
40 #include <sys/promif.h>
41 #include <sys/cpuvar.h>
42 #include <sys/pathname.h>
44 #include <sys/devcache.h>
45 #include <sys/devcache_impl.h>
46 #include <sys/sysmacros.h>
47 #include <sys/varargs.h>
48 #include <sys/callb.h>
51 * This facility provides interfaces to clients to register,
52 * read and update cache data in persisted backing store files,
53 * usually in /etc/devices. The data persisted through this
54 * mechanism should be stateless data, functioning in the sense
55 * of a cache. Writes are performed by a background daemon
56 * thread, permitting a client to schedule an update without
57 * blocking, then continue updating the data state in
58 * parallel. The data is only locked by the daemon thread
59 * to pack the data in preparation for the write.
61 * Data persisted through this mechanism should be capable
62 * of being regenerated through normal system operation,
63 * for example attaching all disk devices would cause all
64 * devids to be registered for those devices. By caching
65 * a devid-device tuple, the system can operate in a
66 * more optimal way, directly attaching the device mapped
67 * to a devid, rather than burdensomely driving attach of
68 * the entire device tree to discover a single device.
70 * Note that a client should only need to include
71 * <sys/devcache.h> for the supported interfaces.
73 * The data per client is entirely within the control of
74 * the client. When reading, data unpacked from the backing
75 * store should be inserted in the list. The pointer to
76 * the list can be retrieved via nvf_list(). When writing,
77 * the data on the list is to be packed and returned to the
78 * nvpdaemon as an nvlist.
80 * Obvious restrictions are imposed by the limits of the
81 * nvlist format. The data cannot be read or written
82 * piecemeal, and large amounts of data aren't recommended.
83 * However, nvlists do allow that data be named and typed
84 * and can be size-of-int invariant, and the cached data
85 * can be versioned conveniently.
87 * The registration involves two steps: a handle is
88 * allocated by calling the registration function.
89 * This sets up the data referenced by the handle and
90 * initializes the lock. Following registration, the
91 * client must initialize the data list. The list
92 * interfaces require that the list element with offset
93 * to the node link be provided. The format of the
94 * list element is under the control of the client.
96 * Locking: the address of the data list r/w lock provided
97 * can be accessed with nvf_lock(). The lock must be held
98 * as reader when traversing the list or checking state,
99 * such as nvf_is_dirty(). The lock must be held as
100 * writer when updating the list or marking it dirty.
101 * The lock must not be held when waking the daemon.
103 * The data r/w lock is held as writer when the pack,
104 * unpack and free list handlers are called. The
105 * lock should not be dropped and must be still held
106 * upon return. The client should also hold the lock
107 * as reader when checking if the list is dirty, and
108 * as writer when marking the list dirty or initiating
111 * The asynchronous nature of updates allows for the
112 * possibility that the data may continue to be updated
113 * once the daemon has been notified that an update is
114 * desired. The data only needs to be locked against
115 * updates when packing the data into the form to be
116 * written. When the write of the packed data has
117 * completed, the daemon will automatically reschedule
118 * an update if the data was marked dirty after the
119 * point at which it was packed. Before beginning an
120 * update, the daemon attempts to lock the data as
121 * writer; if the writer lock is already held, it
122 * backs off and retries later. The model is to give
123 * priority to the kernel processes generating the
124 * data, and that the nature of the data is that
125 * it does not change often, can be re-generated when
126 * needed, so updates should not happen often and
127 * can be delayed until the data stops changing.
128 * The client may update the list or mark it dirty
129 * any time it is able to acquire the lock as
132 * A failed write will be retried after some delay,
133 * in the hope that the cause of the error will be
134 * transient, for example a filesystem with no space
135 * available. An update on a read-only filesystem
136 * is failed silently and not retried; this would be
137 * the case when booted off install media.
139 * There is no unregister mechanism as of yet, as it
140 * hasn't been needed so far.
144 * Global list of files registered and updated by the nvpflush
145 * daemon, protected by the nvf_cache_mutex. While an
146 * update is taking place, a file is temporarily moved to
147 * the dirty list to avoid locking the primary list for
148 * the duration of the update.
150 list_t nvf_cache_files
;
151 list_t nvf_dirty_files
;
152 kmutex_t nvf_cache_mutex
;
156 * Allow some delay from an update of the data before flushing
157 * to permit simultaneous updates of multiple changes.
158 * Changes in the data are expected to be bursty, ie
159 * reconfig or hot-plug of a new adapter.
161 * kfio_report_error (default 0)
162 * Set to 1 to enable some error messages related to low-level
163 * kernel file i/o operations.
165 * nvpflush_delay (default 10)
166 * The number of seconds after data is marked dirty before the
167 * flush daemon is triggered to flush the data. A longer period
168 * of time permits more data updates per write. Note that
169 * every update resets the timer so no repository write will
170 * occur while data is being updated continuously.
172 * nvpdaemon_idle_time (default 60)
173 * The number of seconds the daemon will sleep idle before exiting.
176 #define NVPFLUSH_DELAY 10
177 #define NVPDAEMON_IDLE_TIME 60
179 #define TICKS_PER_SECOND (drv_usectohz(1000000))
184 int kfio_report_error
= 0; /* kernel file i/o operations */
185 int kfio_disable_read
= 0; /* disable all reads */
186 int kfio_disable_write
= 0; /* disable all writes */
188 int nvpflush_delay
= NVPFLUSH_DELAY
;
189 int nvpdaemon_idle_time
= NVPDAEMON_IDLE_TIME
;
191 static timeout_id_t nvpflush_id
= 0;
192 static int nvpflush_timer_busy
= 0;
193 static int nvpflush_daemon_active
= 0;
194 static kthread_t
*nvpflush_thr_id
= 0;
196 static int do_nvpflush
= 0;
197 static int nvpbusy
= 0;
198 static kmutex_t nvpflush_lock
;
199 static kcondvar_t nvpflush_cv
;
200 static kthread_id_t nvpflush_thread
;
201 static clock_t nvpticks
;
203 static void nvpflush_daemon(void);
206 int nvpdaemon_debug
= 0;
210 extern int modrootloaded
;
211 extern void mdi_read_devices_files(void);
212 extern void mdi_clean_vhcache(void);
213 extern int sys_shutdown
;
216 * Initialize the overall cache file management
219 i_ddi_devices_init(void)
221 list_create(&nvf_cache_files
, sizeof (nvfd_t
),
222 offsetof(nvfd_t
, nvf_link
));
223 list_create(&nvf_dirty_files
, sizeof (nvfd_t
),
224 offsetof(nvfd_t
, nvf_link
));
225 mutex_init(&nvf_cache_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
232 * The files read here should be restricted to those
233 * that may be required to mount root.
236 i_ddi_read_devices_files(void)
239 * The retire store should be the first file read as it
240 * may need to offline devices. kfio_disable_read is not
241 * used for retire. For the rationale see the tunable
242 * ddi_retire_store_bypass and comments in:
243 * uts/common/os/retire_store.c
248 if (!kfio_disable_read
) {
249 mdi_read_devices_files();
255 i_ddi_start_flush_daemon(void)
259 ASSERT(i_ddi_io_initialized());
261 mutex_init(&nvpflush_lock
, NULL
, MUTEX_DRIVER
, NULL
);
262 cv_init(&nvpflush_cv
, NULL
, CV_DRIVER
, NULL
);
264 mutex_enter(&nvf_cache_mutex
);
265 for (nvfdp
= list_head(&nvf_cache_files
); nvfdp
;
266 nvfdp
= list_next(&nvf_cache_files
, nvfdp
)) {
267 if (NVF_IS_DIRTY(nvfdp
)) {
272 mutex_exit(&nvf_cache_mutex
);
276 i_ddi_clean_devices_files(void)
278 devid_cache_cleanup();
283 * Register a cache file to be managed and updated by the nvpflush daemon.
284 * All operations are performed through the returned handle.
285 * There is no unregister mechanism for now.
288 nvf_register_file(nvf_ops_t
*ops
)
292 nvfdp
= kmem_zalloc(sizeof (*nvfdp
), KM_SLEEP
);
294 nvfdp
->nvf_ops
= ops
;
295 nvfdp
->nvf_flags
= 0;
296 rw_init(&nvfdp
->nvf_lock
, NULL
, RW_DRIVER
, NULL
);
298 mutex_enter(&nvf_cache_mutex
);
299 list_insert_tail(&nvf_cache_files
, nvfdp
);
300 mutex_exit(&nvf_cache_mutex
);
302 return ((nvf_handle_t
)nvfdp
);
307 nvf_error(const char *fmt
, ...)
311 if (kfio_report_error
) {
313 vcmn_err(CE_NOTE
, fmt
, ap
);
319 * Some operations clients may use to manage the data
320 * to be persisted in a cache file.
323 nvf_cache_name(nvf_handle_t handle
)
325 return (((nvfd_t
*)handle
)->nvf_cache_path
);
329 nvf_lock(nvf_handle_t handle
)
331 return (&(((nvfd_t
*)handle
)->nvf_lock
));
335 nvf_list(nvf_handle_t handle
)
337 return (&(((nvfd_t
*)handle
)->nvf_data_list
));
341 nvf_mark_dirty(nvf_handle_t handle
)
343 ASSERT(RW_WRITE_HELD(&(((nvfd_t
*)handle
)->nvf_lock
)));
344 NVF_MARK_DIRTY((nvfd_t
*)handle
);
348 nvf_is_dirty(nvf_handle_t handle
)
350 ASSERT(RW_LOCK_HELD(&(((nvfd_t
*)handle
)->nvf_lock
)));
351 return (NVF_IS_DIRTY((nvfd_t
*)handle
));
355 nvp_cksum(uchar_t
*buf
, int64_t buflen
)
358 uint16_t *p
= (uint16_t *)buf
;
361 if ((buflen
& 0x01) != 0) {
372 fread_nvlist(char *filename
, nvlist_t
**ret_nvlist
)
382 uint16_t cksum
, hdrsum
;
386 file
= kobj_open_file(filename
);
387 if (file
== (struct _buf
*)-1) {
388 KFDEBUG((CE_CONT
, "cannot open file: %s\n", filename
));
393 n
= kobj_read_file(file
, (char *)&hdr
, sizeof (hdr
), offset
);
394 if (n
!= sizeof (hdr
)) {
395 kobj_close_file(file
);
397 nvf_error("error reading header: %s\n", filename
);
400 KFDEBUG((CE_CONT
, "file empty: %s\n", filename
));
402 nvf_error("header size incorrect: %s\n", filename
);
408 KFDEBUG2((CE_CONT
, "nvpf_magic: 0x%x\n", hdr
.nvpf_magic
));
409 KFDEBUG2((CE_CONT
, "nvpf_version: %d\n", hdr
.nvpf_version
));
410 KFDEBUG2((CE_CONT
, "nvpf_size: %lld\n",
411 (longlong_t
)hdr
.nvpf_size
));
412 KFDEBUG2((CE_CONT
, "nvpf_hdr_chksum: 0x%x\n",
413 hdr
.nvpf_hdr_chksum
));
414 KFDEBUG2((CE_CONT
, "nvpf_chksum: 0x%x\n", hdr
.nvpf_chksum
));
416 cksum
= hdr
.nvpf_hdr_chksum
;
417 hdr
.nvpf_hdr_chksum
= 0;
418 hdrsum
= nvp_cksum((uchar_t
*)&hdr
, sizeof (hdr
));
420 if (hdr
.nvpf_magic
!= NVPF_HDR_MAGIC
||
421 hdr
.nvpf_version
!= NVPF_HDR_VERSION
|| hdrsum
!= cksum
) {
422 kobj_close_file(file
);
423 if (hdrsum
!= cksum
) {
424 nvf_error("%s: checksum error "
425 "(actual 0x%x, expected 0x%x)\n",
426 filename
, hdrsum
, cksum
);
428 nvf_error("%s: header information incorrect", filename
);
432 ASSERT(hdr
.nvpf_size
>= 0);
434 buf
= kmem_alloc(hdr
.nvpf_size
, KM_SLEEP
);
435 n
= kobj_read_file(file
, buf
, hdr
.nvpf_size
, offset
);
436 if (n
!= hdr
.nvpf_size
) {
437 kmem_free(buf
, hdr
.nvpf_size
);
438 kobj_close_file(file
);
440 nvf_error("%s: read error %d", filename
, n
);
442 nvf_error("%s: incomplete read %d/%lld",
443 filename
, n
, (longlong_t
)hdr
.nvpf_size
);
449 rval
= kobj_read_file(file
, &c
, 1, offset
);
450 kobj_close_file(file
);
452 nvf_error("%s is larger than %lld\n",
453 filename
, (longlong_t
)hdr
.nvpf_size
);
454 kmem_free(buf
, hdr
.nvpf_size
);
458 cksum
= nvp_cksum((uchar_t
*)buf
, hdr
.nvpf_size
);
459 if (hdr
.nvpf_chksum
!= cksum
) {
460 nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
461 filename
, hdr
.nvpf_chksum
, cksum
);
462 kmem_free(buf
, hdr
.nvpf_size
);
467 rval
= nvlist_unpack(buf
, hdr
.nvpf_size
, &nvl
, 0);
469 nvf_error("%s: error %d unpacking nvlist\n",
471 kmem_free(buf
, hdr
.nvpf_size
);
475 kmem_free(buf
, hdr
.nvpf_size
);
481 kfcreate(char *filename
, kfile_t
**kfilep
)
486 ASSERT(modrootloaded
);
488 fp
= kmem_alloc(sizeof (kfile_t
), KM_SLEEP
);
490 fp
->kf_vnflags
= FCREAT
| FWRITE
| FTRUNC
;
491 fp
->kf_fname
= filename
;
495 KFDEBUG((CE_CONT
, "create: %s flags 0x%x\n",
496 filename
, fp
->kf_vnflags
));
497 rval
= vn_open(filename
, UIO_SYSSPACE
, fp
->kf_vnflags
,
498 0444, &fp
->kf_vp
, CRCREAT
, 0);
500 kmem_free(fp
, sizeof (kfile_t
));
501 KFDEBUG((CE_CONT
, "%s: create error %d\n",
511 kfremove(char *filename
)
515 KFDEBUG((CE_CONT
, "remove: %s\n", filename
));
516 rval
= vn_remove(filename
, UIO_SYSSPACE
, RMFILE
);
518 KFDEBUG((CE_CONT
, "%s: remove error %d\n",
525 kfread(kfile_t
*fp
, char *buf
, ssize_t bufsiz
, ssize_t
*ret_n
)
531 ASSERT(modrootloaded
);
533 if (fp
->kf_state
!= 0)
534 return (fp
->kf_state
);
536 err
= vn_rdwr(UIO_READ
, fp
->kf_vp
, buf
, bufsiz
, fp
->kf_fpos
,
537 UIO_SYSSPACE
, 0, (rlim64_t
)0, kcred
, &resid
);
539 KFDEBUG((CE_CONT
, "%s: read error %d\n",
545 ASSERT(resid
>= 0 && resid
<= bufsiz
);
548 KFDEBUG1((CE_CONT
, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
549 fp
->kf_fname
, n
, bufsiz
, resid
));
557 kfwrite(kfile_t
*fp
, char *buf
, ssize_t bufsiz
, ssize_t
*ret_n
)
565 ASSERT(modrootloaded
);
567 if (fp
->kf_state
!= 0)
568 return (fp
->kf_state
);
573 err
= vn_rdwr(UIO_WRITE
, fp
->kf_vp
, buf
, len
, fp
->kf_fpos
,
574 UIO_SYSSPACE
, FSYNC
, rlimit
, kcred
, &resid
);
576 KFDEBUG((CE_CONT
, "%s: write error %d\n",
582 KFDEBUG1((CE_CONT
, "%s: write %ld bytes ok %ld resid\n",
583 fp
->kf_fname
, len
-resid
, resid
));
585 ASSERT(resid
>= 0 && resid
<= len
);
592 KFDEBUG((CE_CONT
, "%s: filesystem full?\n",
594 fp
->kf_state
= ENOSPC
;
605 KFDEBUG1((CE_CONT
, "%s: wrote %ld bytes ok\n", fp
->kf_fname
, n
));
617 KFDEBUG((CE_CONT
, "close: %s\n", fp
->kf_fname
));
619 if ((fp
->kf_vnflags
& FWRITE
) && fp
->kf_state
== 0) {
620 rval
= VOP_FSYNC(fp
->kf_vp
, FSYNC
, kcred
, NULL
);
622 nvf_error("%s: sync error %d\n",
625 KFDEBUG((CE_CONT
, "%s: sync ok\n", fp
->kf_fname
));
628 rval
= VOP_CLOSE(fp
->kf_vp
, fp
->kf_vnflags
, 1,
629 (offset_t
)0, kcred
, NULL
);
631 if (fp
->kf_state
== 0) {
632 nvf_error("%s: close error %d\n",
636 if (fp
->kf_state
== 0)
637 KFDEBUG((CE_CONT
, "%s: close ok\n", fp
->kf_fname
));
641 kmem_free(fp
, sizeof (kfile_t
));
646 kfrename(char *oldname
, char *newname
)
650 ASSERT(modrootloaded
);
652 KFDEBUG((CE_CONT
, "renaming %s to %s\n", oldname
, newname
));
654 if ((rval
= vn_rename(oldname
, newname
, UIO_SYSSPACE
)) != 0) {
655 KFDEBUG((CE_CONT
, "rename %s to %s: %d\n",
656 oldname
, newname
, rval
));
663 fwrite_nvlist(char *filename
, nvlist_t
*nvl
)
673 ASSERT(modrootloaded
);
676 err
= nvlist_pack(nvl
, &nvbuf
, &buflen
, NV_ENCODE_NATIVE
, 0);
678 nvf_error("%s: error %d packing nvlist\n",
683 buf
= kmem_alloc(sizeof (nvpf_hdr_t
) + buflen
, KM_SLEEP
);
684 bzero(buf
, sizeof (nvpf_hdr_t
));
686 ((nvpf_hdr_t
*)buf
)->nvpf_magic
= NVPF_HDR_MAGIC
;
687 ((nvpf_hdr_t
*)buf
)->nvpf_version
= NVPF_HDR_VERSION
;
688 ((nvpf_hdr_t
*)buf
)->nvpf_size
= buflen
;
689 ((nvpf_hdr_t
*)buf
)->nvpf_chksum
= nvp_cksum((uchar_t
*)nvbuf
, buflen
);
690 ((nvpf_hdr_t
*)buf
)->nvpf_hdr_chksum
=
691 nvp_cksum((uchar_t
*)buf
, sizeof (nvpf_hdr_t
));
693 bcopy(nvbuf
, buf
+ sizeof (nvpf_hdr_t
), buflen
);
694 kmem_free(nvbuf
, buflen
);
695 buflen
+= sizeof (nvpf_hdr_t
);
697 len
= strlen(filename
) + MAX_SUFFIX_LEN
+ 2;
698 newname
= kmem_alloc(len
, KM_SLEEP
);
701 (void) sprintf(newname
, "%s.%s", filename
, NEW_FILENAME_SUFFIX
);
704 * To make it unlikely we suffer data loss, write
705 * data to the new temporary file. Once successful
706 * complete the transaction by renaming the new file
707 * to replace the previous.
710 if ((err
= kfcreate(newname
, &fp
)) == 0) {
711 err
= kfwrite(fp
, buf
, buflen
, &n
);
713 nvf_error("%s: write error - %d\n",
718 "%s: partial write %ld of %ld bytes\n",
720 nvf_error("%s: filesystem may be full?\n",
725 if ((err1
= kfclose(fp
)) != 0) {
726 nvf_error("%s: close error\n", newname
);
731 if (kfremove(newname
) != 0) {
732 nvf_error("%s: remove failed\n",
737 nvf_error("%s: create failed - %d\n", filename
, err
);
741 if ((err
= kfrename(newname
, filename
)) != 0) {
742 nvf_error("%s: rename from %s failed\n",
747 kmem_free(newname
, len
);
748 kmem_free(buf
, buflen
);
754 e_fwrite_nvlist(nvfd_t
*nvfd
, nvlist_t
*nvl
)
758 if ((err
= fwrite_nvlist(nvfd
->nvf_cache_path
, nvl
)) == 0)
759 return (DDI_SUCCESS
);
762 NVF_MARK_READONLY(nvfd
);
763 return (DDI_FAILURE
);
768 nvp_list_free(nvfd_t
*nvf
)
770 ASSERT(RW_WRITE_HELD(&nvf
->nvf_lock
));
771 (nvf
->nvf_list_free
)((nvf_handle_t
)nvf
);
772 ASSERT(RW_WRITE_HELD(&nvf
->nvf_lock
));
776 * Read a file in the nvlist format
777 * EIO - i/o error during read
778 * ENOENT - file not found
779 * EINVAL - file contents corrupted
782 fread_nvp_list(nvfd_t
*nvfd
)
791 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
793 rval
= fread_nvlist(nvfd
->nvf_cache_path
, &nvl
);
799 while ((nvp
= nvlist_next_nvpair(nvl
, nvp
)) != NULL
) {
800 name
= nvpair_name(nvp
);
801 ASSERT(strlen(name
) > 0);
803 switch (nvpair_type(nvp
)) {
804 case DATA_TYPE_NVLIST
:
805 rval
= nvpair_value_nvlist(nvp
, &sublist
);
808 "nvpair_value_nvlist error %s %d\n",
814 * unpack nvlist for this device and
815 * add elements to data list.
817 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
818 rv
= (nvfd
->nvf_unpack_nvlist
)
819 ((nvf_handle_t
)nvfd
, sublist
, name
);
820 ASSERT(RW_WRITE_HELD(&(nvfd
->nvf_lock
)));
823 "%s: %s invalid list element\n",
824 nvfd
->nvf_cache_path
, name
);
831 nvf_error("%s: %s unsupported data type %d\n",
832 nvfd
->nvf_cache_path
, name
, nvpair_type(nvp
));
850 nvf_read_file(nvf_handle_t nvf_handle
)
852 nvfd_t
*nvfd
= (nvfd_t
*)nvf_handle
;
855 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
857 if (kfio_disable_read
)
860 KFDEBUG((CE_CONT
, "reading %s\n", nvfd
->nvf_cache_path
));
862 rval
= fread_nvp_list(nvfd
);
866 nvfd
->nvf_flags
|= NVF_F_REBUILD_MSG
;
867 cmn_err(CE_WARN
, "%s: I/O error",
868 nvfd
->nvf_cache_path
);
871 nvfd
->nvf_flags
|= NVF_F_CREATE_MSG
;
872 nvf_error("%s: not found\n",
873 nvfd
->nvf_cache_path
);
877 nvfd
->nvf_flags
|= NVF_F_REBUILD_MSG
;
878 cmn_err(CE_WARN
, "%s: data file corrupted",
879 nvfd
->nvf_cache_path
);
887 nvf_write_is_complete(nvfd_t
*fd
)
889 if (fd
->nvf_write_complete
) {
890 (fd
->nvf_write_complete
)((nvf_handle_t
)fd
);
896 nvpflush_timeout(void *arg
)
900 mutex_enter(&nvpflush_lock
);
901 nticks
= nvpticks
- ddi_get_lbolt();
903 nvpflush_timer_busy
= 1;
904 mutex_exit(&nvpflush_lock
);
905 nvpflush_id
= timeout(nvpflush_timeout
, NULL
, nticks
);
908 NVPDAEMON_DEBUG((CE_CONT
, "signal nvpdaemon\n"));
909 cv_signal(&nvpflush_cv
);
911 nvpflush_timer_busy
= 0;
912 mutex_exit(&nvpflush_lock
);
917 * After marking a list as dirty, wake the nvpflush daemon
918 * to perform the update.
921 nvf_wake_daemon(void)
926 * If the system isn't up yet or is shutting down,
927 * don't even think about starting a flush.
929 if (!i_ddi_io_initialized() || sys_shutdown
)
932 mutex_enter(&nvpflush_lock
);
934 if (nvpflush_daemon_active
== 0) {
935 nvpflush_daemon_active
= 1;
936 mutex_exit(&nvpflush_lock
);
937 NVPDAEMON_DEBUG((CE_CONT
, "starting nvpdaemon thread\n"));
938 nvpflush_thr_id
= thread_create(NULL
, 0,
939 (void (*)())nvpflush_daemon
,
940 NULL
, 0, &p0
, TS_RUN
, minclsyspri
);
941 mutex_enter(&nvpflush_lock
);
944 nticks
= nvpflush_delay
* TICKS_PER_SECOND
;
945 nvpticks
= ddi_get_lbolt() + nticks
;
946 if (nvpflush_timer_busy
== 0) {
947 nvpflush_timer_busy
= 1;
948 mutex_exit(&nvpflush_lock
);
949 nvpflush_id
= timeout(nvpflush_timeout
, NULL
, nticks
+ 4);
951 mutex_exit(&nvpflush_lock
);
955 nvpflush_one(nvfd_t
*nvfd
)
957 int rval
= DDI_SUCCESS
;
960 rw_enter(&nvfd
->nvf_lock
, RW_READER
);
962 ASSERT((nvfd
->nvf_flags
& NVF_F_FLUSHING
) == 0);
964 if (!NVF_IS_DIRTY(nvfd
) ||
965 NVF_IS_READONLY(nvfd
) || kfio_disable_write
|| sys_shutdown
) {
966 NVF_CLEAR_DIRTY(nvfd
);
967 rw_exit(&nvfd
->nvf_lock
);
968 return (DDI_SUCCESS
);
971 if (rw_tryupgrade(&nvfd
->nvf_lock
) == 0) {
972 nvf_error("nvpflush: "
973 "%s rw upgrade failed\n", nvfd
->nvf_cache_path
);
974 rw_exit(&nvfd
->nvf_lock
);
975 return (DDI_FAILURE
);
977 if (((nvfd
->nvf_pack_list
)
978 ((nvf_handle_t
)nvfd
, &nvl
)) != DDI_SUCCESS
) {
979 nvf_error("nvpflush: "
980 "%s nvlist construction failed\n", nvfd
->nvf_cache_path
);
981 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
982 rw_exit(&nvfd
->nvf_lock
);
983 return (DDI_FAILURE
);
985 ASSERT(RW_WRITE_HELD(&nvfd
->nvf_lock
));
987 NVF_CLEAR_DIRTY(nvfd
);
988 nvfd
->nvf_flags
|= NVF_F_FLUSHING
;
989 rw_exit(&nvfd
->nvf_lock
);
991 rval
= e_fwrite_nvlist(nvfd
, nvl
);
994 rw_enter(&nvfd
->nvf_lock
, RW_WRITER
);
995 nvfd
->nvf_flags
&= ~NVF_F_FLUSHING
;
996 if (rval
== DDI_FAILURE
) {
997 if (NVF_IS_READONLY(nvfd
)) {
999 nvfd
->nvf_flags
&= ~(NVF_F_ERROR
| NVF_F_DIRTY
);
1000 } else if ((nvfd
->nvf_flags
& NVF_F_ERROR
) == 0) {
1002 "%s: update failed\n", nvfd
->nvf_cache_path
);
1003 nvfd
->nvf_flags
|= NVF_F_ERROR
| NVF_F_DIRTY
;
1006 if (nvfd
->nvf_flags
& NVF_F_CREATE_MSG
) {
1008 "!Creating %s\n", nvfd
->nvf_cache_path
);
1009 nvfd
->nvf_flags
&= ~NVF_F_CREATE_MSG
;
1011 if (nvfd
->nvf_flags
& NVF_F_REBUILD_MSG
) {
1013 "!Rebuilding %s\n", nvfd
->nvf_cache_path
);
1014 nvfd
->nvf_flags
&= ~NVF_F_REBUILD_MSG
;
1016 if (nvfd
->nvf_flags
& NVF_F_ERROR
) {
1018 "%s: update now ok\n", nvfd
->nvf_cache_path
);
1019 nvfd
->nvf_flags
&= ~NVF_F_ERROR
;
1022 * The file may need to be flushed again if the cached
1023 * data was touched while writing the earlier contents.
1025 if (NVF_IS_DIRTY(nvfd
))
1029 rw_exit(&nvfd
->nvf_lock
);
1035 nvpflush_daemon(void)
1037 callb_cpr_t cprinfo
;
1038 nvfd_t
*nvfdp
, *nextfdp
;
1044 ASSERT(modrootloaded
);
1046 nvpflush_thread
= curthread
;
1047 NVPDAEMON_DEBUG((CE_CONT
, "nvpdaemon: init\n"));
1049 CALLB_CPR_INIT(&cprinfo
, &nvpflush_lock
, callb_generic_cpr
, "nvp");
1050 mutex_enter(&nvpflush_lock
);
1052 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
1053 while (do_nvpflush
== 0) {
1054 clk
= cv_reltimedwait(&nvpflush_cv
, &nvpflush_lock
,
1055 (nvpdaemon_idle_time
* TICKS_PER_SECOND
),
1057 if ((clk
== -1 && do_nvpflush
== 0 &&
1058 nvpflush_timer_busy
== 0) || sys_shutdown
) {
1060 * Note that CALLB_CPR_EXIT calls mutex_exit()
1061 * on the lock passed in to CALLB_CPR_INIT,
1062 * so the lock must be held when invoking it.
1064 CALLB_CPR_SAFE_END(&cprinfo
, &nvpflush_lock
);
1065 NVPDAEMON_DEBUG((CE_CONT
, "nvpdaemon: exit\n"));
1066 ASSERT(mutex_owned(&nvpflush_lock
));
1067 nvpflush_thr_id
= NULL
;
1068 nvpflush_daemon_active
= 0;
1069 CALLB_CPR_EXIT(&cprinfo
);
1073 CALLB_CPR_SAFE_END(&cprinfo
, &nvpflush_lock
);
1078 mutex_exit(&nvpflush_lock
);
1081 * Try flushing what's dirty, reschedule if there's
1082 * a failure or data gets marked as dirty again.
1083 * First move each file marked dirty to the dirty
1084 * list to avoid locking the list across the write.
1086 mutex_enter(&nvf_cache_mutex
);
1087 for (nvfdp
= list_head(&nvf_cache_files
);
1088 nvfdp
; nvfdp
= nextfdp
) {
1089 nextfdp
= list_next(&nvf_cache_files
, nvfdp
);
1090 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1091 if (NVF_IS_DIRTY(nvfdp
)) {
1092 list_remove(&nvf_cache_files
, nvfdp
);
1093 list_insert_tail(&nvf_dirty_files
, nvfdp
);
1094 rw_exit(&nvfdp
->nvf_lock
);
1096 NVPDAEMON_DEBUG((CE_CONT
,
1097 "nvpdaemon: not dirty %s\n",
1098 nvfdp
->nvf_cache_path
));
1099 rw_exit(&nvfdp
->nvf_lock
);
1102 mutex_exit(&nvf_cache_mutex
);
1105 * Now go through the dirty list
1107 for (nvfdp
= list_head(&nvf_dirty_files
);
1108 nvfdp
; nvfdp
= nextfdp
) {
1109 nextfdp
= list_next(&nvf_dirty_files
, nvfdp
);
1112 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1113 if (NVF_IS_DIRTY(nvfdp
)) {
1114 NVPDAEMON_DEBUG((CE_CONT
,
1115 "nvpdaemon: flush %s\n",
1116 nvfdp
->nvf_cache_path
));
1117 rw_exit(&nvfdp
->nvf_lock
);
1118 rval
= nvpflush_one(nvfdp
);
1119 rw_enter(&nvfdp
->nvf_lock
, RW_READER
);
1120 if (rval
!= DDI_SUCCESS
||
1121 NVF_IS_DIRTY(nvfdp
)) {
1122 rw_exit(&nvfdp
->nvf_lock
);
1123 NVPDAEMON_DEBUG((CE_CONT
,
1124 "nvpdaemon: %s dirty again\n",
1125 nvfdp
->nvf_cache_path
));
1128 rw_exit(&nvfdp
->nvf_lock
);
1129 nvf_write_is_complete(nvfdp
);
1133 NVPDAEMON_DEBUG((CE_CONT
,
1134 "nvpdaemon: not dirty %s\n",
1135 nvfdp
->nvf_cache_path
));
1136 rw_exit(&nvfdp
->nvf_lock
);
1141 mutex_enter(&nvf_cache_mutex
);
1142 list_remove(&nvf_dirty_files
, nvfdp
);
1143 list_insert_tail(&nvf_cache_files
,
1145 mutex_exit(&nvf_cache_mutex
);
1152 mutex_enter(&nvpflush_lock
);