1 /* $NetBSD: fss.c,v 1.64 2009/10/13 12:37:19 hannken Exp $ */
4 * Copyright (c) 2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * File system snapshot disk driver.
35 * Block/character interface to the snapshot of a mounted file system.
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.64 2009/10/13 12:37:19 hannken Exp $");
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
45 #include <sys/errno.h>
46 #include <sys/malloc.h>
48 #include <sys/ioctl.h>
49 #include <sys/disklabel.h>
50 #include <sys/device.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
58 #include <sys/kthread.h>
59 #include <sys/fstrans.h>
60 #include <sys/simplelock.h>
62 #include <miscfs/specfs/specdev.h>
64 #include <dev/fssvar.h>
70 dev_type_open(fss_open
);
71 dev_type_close(fss_close
);
72 dev_type_read(fss_read
);
73 dev_type_write(fss_write
);
74 dev_type_ioctl(fss_ioctl
);
75 dev_type_strategy(fss_strategy
);
76 dev_type_dump(fss_dump
);
77 dev_type_size(fss_size
);
79 static void fss_unmount_hook(struct mount
*);
80 static int fss_copy_on_write(void *, struct buf
*, bool);
81 static inline void fss_error(struct fss_softc
*, const char *);
82 static int fss_create_files(struct fss_softc
*, struct fss_set
*,
83 off_t
*, struct lwp
*);
84 static int fss_create_snapshot(struct fss_softc
*, struct fss_set
*,
86 static int fss_delete_snapshot(struct fss_softc
*, struct lwp
*);
87 static int fss_softc_alloc(struct fss_softc
*);
88 static void fss_softc_free(struct fss_softc
*);
89 static int fss_read_cluster(struct fss_softc
*, u_int32_t
);
90 static void fss_bs_thread(void *);
91 static int fss_bs_io(struct fss_softc
*, fss_io_type
,
92 u_int32_t
, off_t
, int, void *);
93 static u_int32_t
*fss_bs_indir(struct fss_softc
*, u_int32_t
);
95 static kmutex_t fss_device_lock
; /* Protect all units. */
96 static int fss_num_attached
= 0; /* Number of attached devices. */
97 static struct vfs_hooks fss_vfs_hooks
= {
98 .vh_unmount
= fss_unmount_hook
101 const struct bdevsw fss_bdevsw
= {
102 fss_open
, fss_close
, fss_strategy
, fss_ioctl
,
103 fss_dump
, fss_size
, D_DISK
| D_MPSAFE
106 const struct cdevsw fss_cdevsw
= {
107 fss_open
, fss_close
, fss_read
, fss_write
, fss_ioctl
,
108 nostop
, notty
, nopoll
, nommap
, nokqfilter
, D_DISK
| D_MPSAFE
111 static int fss_match(device_t
, cfdata_t
, void *);
112 static void fss_attach(device_t
, device_t
, void *);
113 static int fss_detach(device_t
, int);
115 CFATTACH_DECL_NEW(fss
, sizeof(struct fss_softc
),
116 fss_match
, fss_attach
, fss_detach
, NULL
);
117 extern struct cfdriver fss_cd
;
123 mutex_init(&fss_device_lock
, MUTEX_DEFAULT
, IPL_NONE
);
124 if (config_cfattach_attach(fss_cd
.cd_name
, &fss_ca
))
125 aprint_error("%s: unable to register\n", fss_cd
.cd_name
);
129 fss_match(device_t self
, cfdata_t cfdata
, void *aux
)
135 fss_attach(device_t parent
, device_t self
, void *aux
)
137 struct fss_softc
*sc
= device_private(self
);
141 mutex_init(&sc
->sc_slock
, MUTEX_DEFAULT
, IPL_NONE
);
142 mutex_init(&sc
->sc_lock
, MUTEX_DEFAULT
, IPL_NONE
);
143 cv_init(&sc
->sc_work_cv
, "fssbs");
144 cv_init(&sc
->sc_cache_cv
, "cowwait");
145 bufq_alloc(&sc
->sc_bufq
, "fcfs", 0);
146 sc
->sc_dkdev
= malloc(sizeof(*sc
->sc_dkdev
), M_DEVBUF
, M_WAITOK
);
147 sc
->sc_dkdev
->dk_info
= NULL
;
148 disk_init(sc
->sc_dkdev
, device_xname(self
), NULL
);
149 if (!pmf_device_register(self
, NULL
, NULL
))
150 aprint_error_dev(self
, "couldn't establish power handler\n");
152 if (fss_num_attached
++ == 0)
153 vfs_hooks_attach(&fss_vfs_hooks
);
157 fss_detach(device_t self
, int flags
)
159 struct fss_softc
*sc
= device_private(self
);
161 if (sc
->sc_flags
& FSS_ACTIVE
)
164 if (--fss_num_attached
== 0)
165 vfs_hooks_detach(&fss_vfs_hooks
);
167 pmf_device_deregister(self
);
168 mutex_destroy(&sc
->sc_slock
);
169 mutex_destroy(&sc
->sc_lock
);
170 cv_destroy(&sc
->sc_work_cv
);
171 cv_destroy(&sc
->sc_cache_cv
);
172 bufq_drain(sc
->sc_bufq
);
173 bufq_free(sc
->sc_bufq
);
174 disk_destroy(sc
->sc_dkdev
);
175 free(sc
->sc_dkdev
, M_DEVBUF
);
181 fss_open(dev_t dev
, int flags
, int mode
, struct lwp
*l
)
185 struct fss_softc
*sc
;
187 mflag
= (mode
== S_IFCHR
? FSS_CDEV_OPEN
: FSS_BDEV_OPEN
);
189 mutex_enter(&fss_device_lock
);
191 sc
= device_lookup_private(&fss_cd
, minor(dev
));
193 cf
= malloc(sizeof(*cf
), M_DEVBUF
, M_WAITOK
);
194 cf
->cf_name
= fss_cd
.cd_name
;
195 cf
->cf_atname
= fss_cd
.cd_name
;
196 cf
->cf_unit
= minor(dev
);
197 cf
->cf_fstate
= FSTATE_STAR
;
198 sc
= device_private(config_attach_pseudo(cf
));
203 mutex_enter(&sc
->sc_slock
);
205 sc
->sc_flags
|= mflag
;
207 mutex_exit(&sc
->sc_slock
);
208 mutex_exit(&fss_device_lock
);
214 fss_close(dev_t dev
, int flags
, int mode
, struct lwp
*l
)
218 struct fss_softc
*sc
= device_lookup_private(&fss_cd
, minor(dev
));
220 mflag
= (mode
== S_IFCHR
? FSS_CDEV_OPEN
: FSS_BDEV_OPEN
);
224 mutex_enter(&sc
->sc_slock
);
225 if ((sc
->sc_flags
& (FSS_CDEV_OPEN
|FSS_BDEV_OPEN
)) != mflag
) {
226 sc
->sc_flags
&= ~mflag
;
227 mutex_exit(&sc
->sc_slock
);
230 if ((sc
->sc_flags
& FSS_ACTIVE
) != 0 &&
231 (sc
->sc_uflags
& FSS_UNCONFIG_ON_CLOSE
) != 0) {
232 sc
->sc_uflags
&= ~FSS_UNCONFIG_ON_CLOSE
;
233 mutex_exit(&sc
->sc_slock
);
234 error
= fss_ioctl(dev
, FSSIOCCLR
, NULL
, FWRITE
, l
);
237 if ((sc
->sc_flags
& FSS_ACTIVE
) != 0) {
238 mutex_exit(&sc
->sc_slock
);
241 if (! mutex_tryenter(&fss_device_lock
)) {
242 mutex_exit(&sc
->sc_slock
);
246 KASSERT((sc
->sc_flags
& FSS_ACTIVE
) == 0);
247 KASSERT((sc
->sc_flags
& (FSS_CDEV_OPEN
|FSS_BDEV_OPEN
)) == mflag
);
248 mutex_exit(&sc
->sc_slock
);
249 cf
= device_cfdata(sc
->sc_dev
);
250 error
= config_detach(sc
->sc_dev
, DETACH_QUIET
);
253 mutex_exit(&fss_device_lock
);
259 fss_strategy(struct buf
*bp
)
261 const bool write
= ((bp
->b_flags
& B_READ
) != B_READ
);
262 struct fss_softc
*sc
= device_lookup_private(&fss_cd
, minor(bp
->b_dev
));
264 mutex_enter(&sc
->sc_slock
);
266 if (write
|| !FSS_ISVALID(sc
)) {
268 mutex_exit(&sc
->sc_slock
);
270 bp
->b_error
= (write
? EROFS
: ENXIO
);
271 bp
->b_resid
= bp
->b_bcount
;
276 bp
->b_rawblkno
= bp
->b_blkno
;
277 bufq_put(sc
->sc_bufq
, bp
);
278 cv_signal(&sc
->sc_work_cv
);
280 mutex_exit(&sc
->sc_slock
);
284 fss_read(dev_t dev
, struct uio
*uio
, int flags
)
286 return physio(fss_strategy
, NULL
, dev
, B_READ
, minphys
, uio
);
290 fss_write(dev_t dev
, struct uio
*uio
, int flags
)
292 return physio(fss_strategy
, NULL
, dev
, B_WRITE
, minphys
, uio
);
296 fss_ioctl(dev_t dev
, u_long cmd
, void *data
, int flag
, struct lwp
*l
)
299 struct fss_softc
*sc
= device_lookup_private(&fss_cd
, minor(dev
));
300 struct fss_set
*fss
= (struct fss_set
*)data
;
301 struct fss_get
*fsg
= (struct fss_get
*)data
;
305 mutex_enter(&sc
->sc_lock
);
306 if ((flag
& FWRITE
) == 0)
308 else if ((sc
->sc_flags
& FSS_ACTIVE
) != 0)
311 error
= fss_create_snapshot(sc
, fss
, l
);
312 mutex_exit(&sc
->sc_lock
);
316 mutex_enter(&sc
->sc_lock
);
317 if ((flag
& FWRITE
) == 0)
319 else if ((sc
->sc_flags
& FSS_ACTIVE
) == 0)
322 error
= fss_delete_snapshot(sc
, l
);
323 mutex_exit(&sc
->sc_lock
);
327 mutex_enter(&sc
->sc_lock
);
328 switch (sc
->sc_flags
& (FSS_PERSISTENT
| FSS_ACTIVE
)) {
330 memcpy(fsg
->fsg_mount
, sc
->sc_mntname
, MNAMELEN
);
331 fsg
->fsg_csize
= FSS_CLSIZE(sc
);
332 fsg
->fsg_time
= sc
->sc_time
;
333 fsg
->fsg_mount_size
= sc
->sc_clcount
;
334 fsg
->fsg_bs_size
= sc
->sc_clnext
;
337 case FSS_PERSISTENT
| FSS_ACTIVE
:
338 memcpy(fsg
->fsg_mount
, sc
->sc_mntname
, MNAMELEN
);
340 fsg
->fsg_time
= sc
->sc_time
;
341 fsg
->fsg_mount_size
= 0;
342 fsg
->fsg_bs_size
= 0;
349 mutex_exit(&sc
->sc_lock
);
353 mutex_enter(&sc
->sc_slock
);
354 sc
->sc_uflags
= *(int *)data
;
355 mutex_exit(&sc
->sc_slock
);
360 mutex_enter(&sc
->sc_slock
);
361 *(int *)data
= sc
->sc_uflags
;
362 mutex_exit(&sc
->sc_slock
);
381 fss_dump(dev_t dev
, daddr_t blkno
, void *va
,
388 * An error occurred reading or writing the snapshot or backing store.
389 * If it is the first error log to console.
390 * The caller holds the mutex.
393 fss_error(struct fss_softc
*sc
, const char *msg
)
396 if ((sc
->sc_flags
& (FSS_ACTIVE
|FSS_ERROR
)) == FSS_ACTIVE
)
397 aprint_error_dev(sc
->sc_dev
, "snapshot invalid: %s\n", msg
);
398 if ((sc
->sc_flags
& FSS_ACTIVE
) == FSS_ACTIVE
)
399 sc
->sc_flags
|= FSS_ERROR
;
403 * Allocate the variable sized parts of the softc and
404 * fork the kernel thread.
406 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size
407 * must be initialized.
410 fss_softc_alloc(struct fss_softc
*sc
)
414 if ((sc
->sc_flags
& FSS_PERSISTENT
) == 0) {
416 kmem_zalloc(howmany(sc
->sc_clcount
, NBBY
), KM_SLEEP
);
417 if (sc
->sc_copied
== NULL
)
420 sc
->sc_cache
= kmem_alloc(sc
->sc_cache_size
*
421 sizeof(struct fss_cache
), KM_SLEEP
);
422 if (sc
->sc_cache
== NULL
)
425 for (i
= 0; i
< sc
->sc_cache_size
; i
++) {
426 sc
->sc_cache
[i
].fc_type
= FSS_CACHE_FREE
;
427 sc
->sc_cache
[i
].fc_data
=
428 kmem_alloc(FSS_CLSIZE(sc
), KM_SLEEP
);
429 if (sc
->sc_cache
[i
].fc_data
== NULL
)
431 cv_init(&sc
->sc_cache
[i
].fc_state_cv
, "cowwait1");
435 kmem_zalloc(howmany(sc
->sc_indir_size
, NBBY
), KM_SLEEP
);
436 if (sc
->sc_indir_valid
== NULL
)
439 sc
->sc_indir_data
= kmem_zalloc(FSS_CLSIZE(sc
), KM_SLEEP
);
440 if (sc
->sc_indir_data
== NULL
)
443 sc
->sc_copied
= NULL
;
445 sc
->sc_indir_valid
= NULL
;
446 sc
->sc_indir_data
= NULL
;
449 sc
->sc_flags
|= FSS_BS_THREAD
;
450 if ((error
= kthread_create(PRI_BIO
, 0, NULL
, fss_bs_thread
, sc
,
451 &sc
->sc_bs_lwp
, device_xname(sc
->sc_dev
))) != 0) {
452 sc
->sc_flags
&= ~FSS_BS_THREAD
;
456 disk_attach(sc
->sc_dkdev
);
462 * Free the variable sized parts of the softc.
465 fss_softc_free(struct fss_softc
*sc
)
469 if ((sc
->sc_flags
& FSS_BS_THREAD
) != 0) {
470 mutex_enter(&sc
->sc_slock
);
471 sc
->sc_flags
&= ~FSS_BS_THREAD
;
472 cv_signal(&sc
->sc_work_cv
);
473 while (sc
->sc_bs_lwp
!= NULL
)
474 kpause("fssdetach", false, 1, &sc
->sc_slock
);
475 mutex_exit(&sc
->sc_slock
);
478 disk_detach(sc
->sc_dkdev
);
480 if (sc
->sc_copied
!= NULL
)
481 kmem_free(sc
->sc_copied
, howmany(sc
->sc_clcount
, NBBY
));
482 sc
->sc_copied
= NULL
;
484 if (sc
->sc_cache
!= NULL
) {
485 for (i
= 0; i
< sc
->sc_cache_size
; i
++)
486 if (sc
->sc_cache
[i
].fc_data
!= NULL
) {
487 cv_destroy(&sc
->sc_cache
[i
].fc_state_cv
);
488 kmem_free(sc
->sc_cache
[i
].fc_data
,
491 kmem_free(sc
->sc_cache
,
492 sc
->sc_cache_size
*sizeof(struct fss_cache
));
496 if (sc
->sc_indir_valid
!= NULL
)
497 kmem_free(sc
->sc_indir_valid
, howmany(sc
->sc_indir_size
, NBBY
));
498 sc
->sc_indir_valid
= NULL
;
500 if (sc
->sc_indir_data
!= NULL
)
501 kmem_free(sc
->sc_indir_data
, FSS_CLSIZE(sc
));
502 sc
->sc_indir_data
= NULL
;
506 * Set all active snapshots on this file system into ERROR state.
509 fss_unmount_hook(struct mount
*mp
)
512 struct fss_softc
*sc
;
514 mutex_enter(&fss_device_lock
);
515 for (i
= 0; i
< fss_cd
.cd_ndevs
; i
++) {
516 if ((sc
= device_lookup_private(&fss_cd
, i
)) == NULL
)
518 mutex_enter(&sc
->sc_slock
);
519 if ((sc
->sc_flags
& FSS_ACTIVE
) != 0 &&
521 fss_error(sc
, "forced unmount");
522 mutex_exit(&sc
->sc_slock
);
524 mutex_exit(&fss_device_lock
);
528 * A buffer is written to the snapshotted block device. Copy to
529 * backing store if needed.
532 fss_copy_on_write(void *v
, struct buf
*bp
, bool data_valid
)
536 struct fss_softc
*sc
= v
;
538 mutex_enter(&sc
->sc_slock
);
539 if (!FSS_ISVALID(sc
)) {
540 mutex_exit(&sc
->sc_slock
);
544 cl
= FSS_BTOCL(sc
, dbtob(bp
->b_blkno
));
545 ch
= FSS_BTOCL(sc
, dbtob(bp
->b_blkno
)+bp
->b_bcount
-1);
547 if (curlwp
== uvm
.pagedaemon_lwp
) {
548 for (c
= cl
; c
<= ch
; c
++)
549 if (isclr(sc
->sc_copied
, c
)) {
554 mutex_exit(&sc
->sc_slock
);
557 for (c
= cl
; c
<= ch
; c
++) {
558 error
= fss_read_cluster(sc
, c
);
567 * Lookup and open needed files.
569 * For file system internal snapshot initializes sc_mntname, sc_mount,
570 * sc_bs_vp and sc_time.
572 * Otherwise returns dev and size of the underlying block device.
573 * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount
576 fss_create_files(struct fss_softc
*sc
, struct fss_set
*fss
,
577 off_t
*bsize
, struct lwp
*l
)
579 int error
, bits
, fsbsize
;
581 struct partinfo dpart
;
583 /* nd -> nd2 to reduce mistakes while updating only some namei calls */
584 struct nameidata nd2
;
588 * Get the mounted file system.
591 error
= namei_simple_user(fss
->fss_mount
,
592 NSM_FOLLOW_NOEMULROOT
, &vp
);
596 if ((vp
->v_vflag
& VV_ROOT
) != VV_ROOT
) {
601 sc
->sc_mount
= vp
->v_mount
;
602 memcpy(sc
->sc_mntname
, sc
->sc_mount
->mnt_stat
.f_mntonname
, MNAMELEN
);
607 * Check for file system internal snapshot.
610 error
= namei_simple_user(fss
->fss_bstore
,
611 NSM_FOLLOW_NOEMULROOT
, &vp
);
614 error
= vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
620 if (vp
->v_type
== VREG
&& vp
->v_mount
== sc
->sc_mount
) {
621 sc
->sc_flags
|= FSS_PERSISTENT
;
624 fsbsize
= sc
->sc_bs_vp
->v_mount
->mnt_stat
.f_iosize
;
625 bits
= sizeof(sc
->sc_bs_bshift
)*NBBY
;
626 for (sc
->sc_bs_bshift
= 1; sc
->sc_bs_bshift
< bits
;
628 if (FSS_FSBSIZE(sc
) == fsbsize
)
630 if (sc
->sc_bs_bshift
>= bits
) {
631 VOP_UNLOCK(sc
->sc_bs_vp
, 0);
635 sc
->sc_bs_bmask
= FSS_FSBSIZE(sc
)-1;
638 error
= VFS_SNAPSHOT(sc
->sc_mount
, sc
->sc_bs_vp
, &ts
);
639 TIMESPEC_TO_TIMEVAL(&sc
->sc_time
, &ts
);
641 VOP_UNLOCK(sc
->sc_bs_vp
, 0);
648 * Get the block device it is mounted on.
651 error
= namei_simple_kernel(sc
->sc_mount
->mnt_stat
.f_mntfromname
,
652 NSM_FOLLOW_NOEMULROOT
, &vp
);
656 if (vp
->v_type
!= VBLK
) {
661 sc
->sc_bdev
= vp
->v_rdev
;
665 * Get the block device size.
668 error
= bdev_ioctl(sc
->sc_bdev
, DIOCGPART
, &dpart
, FREAD
, l
);
672 *bsize
= (off_t
)dpart
.disklab
->d_secsize
*dpart
.part
->p_size
;
675 * Get the backing store
678 NDINIT(&nd2
, LOOKUP
, FOLLOW
, UIO_USERSPACE
, fss
->fss_bstore
);
679 if ((error
= vn_open(&nd2
, FREAD
|FWRITE
, 0)) != 0)
681 VOP_UNLOCK(nd2
.ni_vp
, 0);
683 sc
->sc_bs_vp
= nd2
.ni_vp
;
685 if (nd2
.ni_vp
->v_type
!= VREG
&& nd2
.ni_vp
->v_type
!= VCHR
)
688 if (sc
->sc_bs_vp
->v_type
== VREG
) {
689 error
= VOP_GETATTR(sc
->sc_bs_vp
, &va
, l
->l_cred
);
692 sc
->sc_bs_size
= va
.va_size
;
693 fsbsize
= sc
->sc_bs_vp
->v_mount
->mnt_stat
.f_iosize
;
694 if (fsbsize
& (fsbsize
-1)) /* No power of two */
696 for (sc
->sc_bs_bshift
= 1; sc
->sc_bs_bshift
< 32;
698 if (FSS_FSBSIZE(sc
) == fsbsize
)
700 if (sc
->sc_bs_bshift
>= 32)
702 sc
->sc_bs_bmask
= FSS_FSBSIZE(sc
)-1;
704 sc
->sc_bs_bshift
= DEV_BSHIFT
;
705 sc
->sc_bs_bmask
= FSS_FSBSIZE(sc
)-1;
715 fss_create_snapshot(struct fss_softc
*sc
, struct fss_set
*fss
, struct lwp
*l
)
721 bsize
= 0; /* XXX gcc */
726 if ((error
= fss_create_files(sc
, fss
, &bsize
, l
)) != 0)
729 if (sc
->sc_flags
& FSS_PERSISTENT
) {
731 sc
->sc_flags
|= FSS_ACTIVE
;
736 * Set cluster size. Must be a power of two and
737 * a multiple of backing store block size.
739 if (fss
->fss_csize
<= 0)
742 csize
= fss
->fss_csize
;
743 if (bsize
/csize
> FSS_CLUSTER_MAX
)
744 csize
= bsize
/FSS_CLUSTER_MAX
+1;
746 for (sc
->sc_clshift
= sc
->sc_bs_bshift
; sc
->sc_clshift
< 32;
748 if (FSS_CLSIZE(sc
) >= csize
)
750 if (sc
->sc_clshift
>= 32) {
754 sc
->sc_clmask
= FSS_CLSIZE(sc
)-1;
757 * Set number of cache slots.
759 if (FSS_CLSIZE(sc
) <= 8192)
760 sc
->sc_cache_size
= 32;
761 else if (FSS_CLSIZE(sc
) <= 65536)
762 sc
->sc_cache_size
= 8;
764 sc
->sc_cache_size
= 4;
767 * Set number of clusters and size of last cluster.
769 sc
->sc_clcount
= FSS_BTOCL(sc
, bsize
-1)+1;
770 sc
->sc_clresid
= FSS_CLOFF(sc
, bsize
-1)+1;
773 * Set size of indirect table.
775 len
= sc
->sc_clcount
*sizeof(u_int32_t
);
776 sc
->sc_indir_size
= FSS_BTOCL(sc
, len
)+1;
777 sc
->sc_clnext
= sc
->sc_indir_size
;
778 sc
->sc_indir_cur
= 0;
780 if ((error
= fss_softc_alloc(sc
)) != 0)
784 * Activate the snapshot.
787 if ((error
= vfs_suspend(sc
->sc_mount
, 0)) != 0)
790 microtime(&sc
->sc_time
);
793 error
= fscow_establish(sc
->sc_mount
,
794 fss_copy_on_write
, sc
);
796 sc
->sc_flags
|= FSS_ACTIVE
;
798 vfs_resume(sc
->sc_mount
);
803 aprint_debug_dev(sc
->sc_dev
, "%s snapshot active\n", sc
->sc_mntname
);
804 aprint_debug_dev(sc
->sc_dev
,
805 "%u clusters of %u, %u cache slots, %u indir clusters\n",
806 sc
->sc_clcount
, FSS_CLSIZE(sc
),
807 sc
->sc_cache_size
, sc
->sc_indir_size
);
813 if (sc
->sc_bs_vp
!= NULL
) {
814 if (sc
->sc_flags
& FSS_PERSISTENT
)
815 vn_close(sc
->sc_bs_vp
, FREAD
, l
->l_cred
);
817 vn_close(sc
->sc_bs_vp
, FREAD
|FWRITE
, l
->l_cred
);
828 fss_delete_snapshot(struct fss_softc
*sc
, struct lwp
*l
)
831 if ((sc
->sc_flags
& FSS_PERSISTENT
) == 0)
832 fscow_disestablish(sc
->sc_mount
, fss_copy_on_write
, sc
);
834 mutex_enter(&sc
->sc_slock
);
835 sc
->sc_flags
&= ~(FSS_ACTIVE
|FSS_ERROR
);
838 mutex_exit(&sc
->sc_slock
);
841 if (sc
->sc_flags
& FSS_PERSISTENT
)
842 vn_close(sc
->sc_bs_vp
, FREAD
, l
->l_cred
);
844 vn_close(sc
->sc_bs_vp
, FREAD
|FWRITE
, l
->l_cred
);
846 sc
->sc_flags
&= ~FSS_PERSISTENT
;
852 * Read a cluster from the snapshotted block device to the cache.
855 fss_read_cluster(struct fss_softc
*sc
, u_int32_t cl
)
857 int error
, todo
, offset
, len
;
859 struct buf
*bp
, *mbp
;
860 struct fss_cache
*scp
, *scl
;
863 * Get a free cache slot.
865 scl
= sc
->sc_cache
+sc
->sc_cache_size
;
867 mutex_enter(&sc
->sc_slock
);
870 if (isset(sc
->sc_copied
, cl
) || !FSS_ISVALID(sc
)) {
871 mutex_exit(&sc
->sc_slock
);
875 for (scp
= sc
->sc_cache
; scp
< scl
; scp
++)
876 if (scp
->fc_cluster
== cl
) {
877 if (scp
->fc_type
== FSS_CACHE_VALID
) {
878 mutex_exit(&sc
->sc_slock
);
880 } else if (scp
->fc_type
== FSS_CACHE_BUSY
) {
881 cv_wait(&scp
->fc_state_cv
, &sc
->sc_slock
);
886 for (scp
= sc
->sc_cache
; scp
< scl
; scp
++)
887 if (scp
->fc_type
== FSS_CACHE_FREE
) {
888 scp
->fc_type
= FSS_CACHE_BUSY
;
889 scp
->fc_cluster
= cl
;
893 cv_wait(&sc
->sc_cache_cv
, &sc
->sc_slock
);
897 mutex_exit(&sc
->sc_slock
);
902 dblk
= btodb(FSS_CLTOB(sc
, cl
));
903 if (cl
== sc
->sc_clcount
-1) {
904 todo
= sc
->sc_clresid
;
905 memset((char *)scp
->fc_data
+ todo
, 0, FSS_CLSIZE(sc
) - todo
);
907 todo
= FSS_CLSIZE(sc
);
909 mbp
= getiobuf(NULL
, true);
910 mbp
->b_bufsize
= todo
;
911 mbp
->b_data
= scp
->fc_data
;
912 mbp
->b_resid
= mbp
->b_bcount
= todo
;
913 mbp
->b_flags
= B_READ
;
914 mbp
->b_cflags
= BC_BUSY
;
915 mbp
->b_dev
= sc
->sc_bdev
;
920 if (btodb(FSS_CLTOB(sc
, cl
)) == dblk
&& len
== todo
)
923 bp
= getiobuf(NULL
, true);
924 nestiobuf_setup(mbp
, bp
, offset
, len
);
933 error
= biowait(mbp
);
936 mutex_enter(&sc
->sc_slock
);
937 scp
->fc_type
= (error
? FSS_CACHE_FREE
: FSS_CACHE_VALID
);
938 cv_broadcast(&scp
->fc_state_cv
);
940 setbit(sc
->sc_copied
, scp
->fc_cluster
);
941 cv_signal(&sc
->sc_work_cv
);
943 mutex_exit(&sc
->sc_slock
);
949 * Read/write clusters from/to backing store.
950 * For persistent snapshots must be called with cl == 0. off is the
951 * offset into the snapshot.
954 fss_bs_io(struct fss_softc
*sc
, fss_io_type rw
,
955 u_int32_t cl
, off_t off
, int len
, void *data
)
959 off
+= FSS_CLTOB(sc
, cl
);
961 vn_lock(sc
->sc_bs_vp
, LK_EXCLUSIVE
|LK_RETRY
);
963 error
= vn_rdwr((rw
== FSS_READ
? UIO_READ
: UIO_WRITE
), sc
->sc_bs_vp
,
964 data
, len
, off
, UIO_SYSSPACE
, IO_UNIT
|IO_NODELOCKED
,
965 sc
->sc_bs_lwp
->l_cred
, NULL
, NULL
);
967 mutex_enter(&sc
->sc_bs_vp
->v_interlock
);
968 error
= VOP_PUTPAGES(sc
->sc_bs_vp
, trunc_page(off
),
969 round_page(off
+len
), PGO_CLEANIT
|PGO_SYNCIO
|PGO_FREE
);
972 VOP_UNLOCK(sc
->sc_bs_vp
, 0);
978 * Get a pointer to the indirect slot for this cluster.
981 fss_bs_indir(struct fss_softc
*sc
, u_int32_t cl
)
986 icl
= cl
/(FSS_CLSIZE(sc
)/sizeof(u_int32_t
));
987 ioff
= cl
%(FSS_CLSIZE(sc
)/sizeof(u_int32_t
));
989 if (sc
->sc_indir_cur
== icl
)
990 return &sc
->sc_indir_data
[ioff
];
992 if (sc
->sc_indir_dirty
) {
993 if (fss_bs_io(sc
, FSS_WRITE
, sc
->sc_indir_cur
, 0,
994 FSS_CLSIZE(sc
), (void *)sc
->sc_indir_data
) != 0)
996 setbit(sc
->sc_indir_valid
, sc
->sc_indir_cur
);
999 sc
->sc_indir_dirty
= 0;
1000 sc
->sc_indir_cur
= icl
;
1002 if (isset(sc
->sc_indir_valid
, sc
->sc_indir_cur
)) {
1003 if (fss_bs_io(sc
, FSS_READ
, sc
->sc_indir_cur
, 0,
1004 FSS_CLSIZE(sc
), (void *)sc
->sc_indir_data
) != 0)
1007 memset(sc
->sc_indir_data
, 0, FSS_CLSIZE(sc
));
1009 return &sc
->sc_indir_data
[ioff
];
1013 * The kernel thread (one for every active snapshot).
1015 * After wakeup it cleans the cache and runs the I/O requests.
1018 fss_bs_thread(void *arg
)
1020 bool thread_idle
, is_valid
;
1021 int error
, i
, todo
, len
, crotor
, is_read
;
1024 u_int32_t c
, cl
, ch
, *indirp
;
1025 struct buf
*bp
, *nbp
;
1026 struct fss_softc
*sc
;
1027 struct fss_cache
*scp
, *scl
;
1030 scl
= sc
->sc_cache
+sc
->sc_cache_size
;
1032 thread_idle
= false;
1034 mutex_enter(&sc
->sc_slock
);
1038 cv_wait(&sc
->sc_work_cv
, &sc
->sc_slock
);
1040 if ((sc
->sc_flags
& FSS_BS_THREAD
) == 0) {
1041 sc
->sc_bs_lwp
= NULL
;
1042 mutex_exit(&sc
->sc_slock
);
1047 * Process I/O requests (persistent)
1050 if (sc
->sc_flags
& FSS_PERSISTENT
) {
1051 if ((bp
= bufq_get(sc
->sc_bufq
)) == NULL
)
1053 is_valid
= FSS_ISVALID(sc
);
1054 is_read
= (bp
->b_flags
& B_READ
);
1055 thread_idle
= false;
1056 mutex_exit(&sc
->sc_slock
);
1059 disk_busy(sc
->sc_dkdev
);
1060 error
= fss_bs_io(sc
, FSS_READ
, 0,
1061 dbtob(bp
->b_blkno
), bp
->b_bcount
,
1063 disk_unbusy(sc
->sc_dkdev
,
1064 (error
? 0 : bp
->b_bcount
), is_read
);
1068 bp
->b_error
= error
;
1069 bp
->b_resid
= (error
? bp
->b_bcount
: 0);
1072 mutex_enter(&sc
->sc_slock
);
1079 for (i
= 0; i
< sc
->sc_cache_size
; i
++) {
1080 crotor
= (crotor
+ 1) % sc
->sc_cache_size
;
1081 scp
= sc
->sc_cache
+ crotor
;
1082 if (scp
->fc_type
!= FSS_CACHE_VALID
)
1084 mutex_exit(&sc
->sc_slock
);
1086 thread_idle
= false;
1087 indirp
= fss_bs_indir(sc
, scp
->fc_cluster
);
1088 if (indirp
!= NULL
) {
1089 error
= fss_bs_io(sc
, FSS_WRITE
, sc
->sc_clnext
,
1090 0, FSS_CLSIZE(sc
), scp
->fc_data
);
1094 mutex_enter(&sc
->sc_slock
);
1096 *indirp
= sc
->sc_clnext
++;
1097 sc
->sc_indir_dirty
= 1;
1099 fss_error(sc
, "write error on backing store");
1101 scp
->fc_type
= FSS_CACHE_FREE
;
1102 cv_signal(&sc
->sc_cache_cv
);
1107 * Process I/O requests
1109 if ((bp
= bufq_get(sc
->sc_bufq
)) == NULL
)
1111 is_valid
= FSS_ISVALID(sc
);
1112 is_read
= (bp
->b_flags
& B_READ
);
1113 thread_idle
= false;
1116 mutex_exit(&sc
->sc_slock
);
1118 bp
->b_error
= ENXIO
;
1119 bp
->b_resid
= bp
->b_bcount
;
1122 mutex_enter(&sc
->sc_slock
);
1126 disk_busy(sc
->sc_dkdev
);
1129 * First read from the snapshotted block device unless
1130 * this request is completely covered by backing store.
1133 cl
= FSS_BTOCL(sc
, dbtob(bp
->b_blkno
));
1134 off
= FSS_CLOFF(sc
, dbtob(bp
->b_blkno
));
1135 ch
= FSS_BTOCL(sc
, dbtob(bp
->b_blkno
)+bp
->b_bcount
-1);
1139 for (c
= cl
; c
<= ch
; c
++) {
1140 if (isset(sc
->sc_copied
, c
))
1142 mutex_exit(&sc
->sc_slock
);
1144 /* Not on backing store, read from device. */
1145 nbp
= getiobuf(NULL
, true);
1146 nbp
->b_flags
= B_READ
;
1147 nbp
->b_resid
= nbp
->b_bcount
= bp
->b_bcount
;
1148 nbp
->b_bufsize
= bp
->b_bcount
;
1149 nbp
->b_data
= bp
->b_data
;
1150 nbp
->b_blkno
= bp
->b_blkno
;
1152 nbp
->b_dev
= sc
->sc_bdev
;
1153 SET(nbp
->b_cflags
, BC_BUSY
); /* mark buffer busy */
1157 error
= biowait(nbp
);
1159 bp
->b_resid
= bp
->b_bcount
;
1160 bp
->b_error
= nbp
->b_error
;
1161 disk_unbusy(sc
->sc_dkdev
, 0, is_read
);
1166 mutex_enter(&sc
->sc_slock
);
1173 * Replace those parts that have been saved to backing store.
1177 todo
= bp
->b_bcount
;
1178 for (c
= cl
; c
<= ch
; c
++, off
= 0, todo
-= len
, addr
+= len
) {
1179 len
= FSS_CLSIZE(sc
)-off
;
1182 if (isclr(sc
->sc_copied
, c
))
1184 mutex_exit(&sc
->sc_slock
);
1186 indirp
= fss_bs_indir(sc
, c
);
1187 if (indirp
== NULL
|| *indirp
== 0) {
1189 * Not on backing store. Either in cache
1190 * or hole in the snapshotted block device.
1193 mutex_enter(&sc
->sc_slock
);
1194 for (scp
= sc
->sc_cache
; scp
< scl
; scp
++)
1195 if (scp
->fc_type
== FSS_CACHE_VALID
&&
1196 scp
->fc_cluster
== c
)
1199 memcpy(addr
, (char *)scp
->fc_data
+off
,
1202 memset(addr
, 0, len
);
1207 * Read from backing store.
1210 fss_bs_io(sc
, FSS_READ
, *indirp
, off
, len
, addr
);
1212 mutex_enter(&sc
->sc_slock
);
1214 bp
->b_resid
= bp
->b_bcount
;
1215 bp
->b_error
= error
;
1219 mutex_exit(&sc
->sc_slock
);
1221 disk_unbusy(sc
->sc_dkdev
, (error
? 0 : bp
->b_bcount
), is_read
);
1224 mutex_enter(&sc
->sc_slock
);
1230 #include <sys/module.h>
1232 MODULE(MODULE_CLASS_DRIVER
, fss
, NULL
);
1233 CFDRIVER_DECL(fss
, DV_DISK
, NULL
);
1236 fss_modcmd(modcmd_t cmd
, void *arg
)
1238 int bmajor
= -1, cmajor
= -1, error
= 0;
1241 case MODULE_CMD_INIT
:
1242 mutex_init(&fss_device_lock
, MUTEX_DEFAULT
, IPL_NONE
);
1243 error
= config_cfdriver_attach(&fss_cd
);
1245 mutex_destroy(&fss_device_lock
);
1248 error
= config_cfattach_attach(fss_cd
.cd_name
, &fss_ca
);
1250 config_cfdriver_detach(&fss_cd
);
1251 mutex_destroy(&fss_device_lock
);
1254 error
= devsw_attach(fss_cd
.cd_name
,
1255 &fss_bdevsw
, &bmajor
, &fss_cdevsw
, &cmajor
);
1257 config_cfattach_detach(fss_cd
.cd_name
, &fss_ca
);
1258 config_cfdriver_detach(&fss_cd
);
1259 mutex_destroy(&fss_device_lock
);
1264 case MODULE_CMD_FINI
:
1265 error
= config_cfattach_detach(fss_cd
.cd_name
, &fss_ca
);
1268 config_cfdriver_detach(&fss_cd
);
1269 devsw_detach(&fss_bdevsw
, &fss_cdevsw
);
1270 mutex_destroy(&fss_device_lock
);
1281 #endif /* _MODULE */