5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
39 #include <sys/types.h>
40 #include <sys/thread.h>
41 #include <sys/t_lock.h>
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/bitmap.h>
46 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/errno.h>
52 #include <sys/fcntl.h>
53 #include <sys/flock.h>
57 #include <sys/vmsystm.h>
60 #include <sys/sysmacros.h>
63 #include <sys/vfs_opreg.h>
64 #include <sys/vnode.h>
68 #include <sys/fs/decomp.h>
74 #include <vm/seg_vn.h>
75 #include <vm/seg_kmem.h>
76 #include <vm/seg_map.h>
78 #include <fs/fs_subr.h>
81 * dcfs - A filesystem for automatic decompressing of fiocompressed files
83 * This filesystem is a layered filesystem that sits on top of a normal
84 * persistent filesystem and provides automatic decompression of files
85 * that have been previously compressed and stored on the host file system.
86 * This is a pseudo filesystem in that it does not persist data, rather it
87 * intercepts file lookup requests on the host filesystem and provides
88 * transparent decompression of those files. Currently the only supported
89 * host filesystem is ufs.
91 * A file is compressed via a userland utility (currently cmd/boot/fiocompress)
92 * and marked by fiocompress as a compressed file via a flag in the on-disk
93 * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED
94 * ufs_lookup checks for this flag and if set, passes control to decompvp
95 * a function defined in this (dcfs) filesystem. decomvp uncompresses the file
96 * and returns a dcfs vnode to the VFS layer.
98 * dcfs is layered on top of ufs and passes requests involving persistence
99 * to the underlying ufs filesystem. The compressed files currently cannot be
105 * Define data structures within this file.
108 #define DCTABLESIZE 16
110 #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0)
111 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1))
113 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC)
118 #define DCCACHESIZE 4
120 #define rounddown(x, y) ((x) & ~((y) - 1))
122 struct dcnode
*dctable
[DCTABLESIZE
];
124 struct dcnode
*dclru
;
125 static int dclru_len
;
127 kmutex_t dctable_lock
;
132 struct kmem_cache
*dcnode_cache
;
133 struct kmem_cache
*dcbuf_cache
[DCCACHESIZE
];
135 kmutex_t dccache_lock
;
137 static int dcinit(int, char *);
139 static struct dcnode
*dcnode_alloc(void);
140 static void dcnode_free(struct dcnode
*);
141 static void dcnode_recycle(struct dcnode
*);
143 static void dcinsert(struct dcnode
*);
144 static void dcdelete(struct dcnode
*);
145 static struct dcnode
*dcfind(struct vnode
*);
146 static void dclru_add(struct dcnode
*);
147 static void dclru_sub(struct dcnode
*);
151 * This is the loadable module wrapper.
153 #include <sys/modctl.h>
155 struct vfsops
*dc_vfsops
;
157 static vfsdef_t vfw
= {
166 * Module linkage information for the kernel.
168 extern struct mod_ops mod_fsops
;
170 static struct modlfs modlfs
= {
171 &mod_fsops
, "compressed filesystem", &vfw
174 static struct modlinkage modlinkage
= {
175 MODREV_1
, (void *)&modlfs
, NULL
181 return (mod_install(&modlinkage
));
185 _info(struct modinfo
*modinfop
)
187 return (mod_info(&modlinkage
, modinfop
));
191 static int dc_open(struct vnode
**, int, struct cred
*, caller_context_t
*);
192 static int dc_close(struct vnode
*, int, int, offset_t
,
193 struct cred
*, caller_context_t
*);
194 static int dc_read(struct vnode
*, struct uio
*, int, struct cred
*,
195 struct caller_context
*);
196 static int dc_getattr(struct vnode
*, struct vattr
*, int,
197 struct cred
*, caller_context_t
*);
198 static int dc_setattr(struct vnode
*, struct vattr
*, int, struct cred
*,
199 struct caller_context
*);
200 static int dc_access(struct vnode
*, int, int,
201 struct cred
*, caller_context_t
*);
202 static int dc_fsync(struct vnode
*, int, struct cred
*, caller_context_t
*);
203 static void dc_inactive(struct vnode
*, struct cred
*, caller_context_t
*);
204 static int dc_fid(struct vnode
*, struct fid
*, caller_context_t
*);
205 static int dc_seek(struct vnode
*, offset_t
, offset_t
*, caller_context_t
*);
206 static int dc_frlock(struct vnode
*, int, struct flock64
*, int, offset_t
,
207 struct flk_callback
*, struct cred
*, caller_context_t
*);
208 static int dc_realvp(struct vnode
*, struct vnode
**, caller_context_t
*);
209 static int dc_getpage(struct vnode
*, offset_t
, size_t, uint_t
*,
210 struct page
**, size_t, struct seg
*, caddr_t
, enum seg_rw
,
211 struct cred
*, caller_context_t
*);
212 static int dc_putpage(struct vnode
*, offset_t
, size_t, int,
213 struct cred
*, caller_context_t
*);
214 static int dc_map(struct vnode
*, offset_t
, struct as
*, caddr_t
*, size_t,
215 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
216 static int dc_addmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
217 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
218 static int dc_delmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
219 uint_t
, uint_t
, uint_t
, struct cred
*, caller_context_t
*);
221 struct vnodeops
*dc_vnodeops
;
223 const fs_operation_def_t dc_vnodeops_template
[] = {
224 VOPNAME_OPEN
, { .vop_open
= dc_open
},
225 VOPNAME_CLOSE
, { .vop_close
= dc_close
},
226 VOPNAME_READ
, { .vop_read
= dc_read
},
227 VOPNAME_GETATTR
, { .vop_getattr
= dc_getattr
},
228 VOPNAME_SETATTR
, { .vop_setattr
= dc_setattr
},
229 VOPNAME_ACCESS
, { .vop_access
= dc_access
},
230 VOPNAME_FSYNC
, { .vop_fsync
= dc_fsync
},
231 VOPNAME_INACTIVE
, { .vop_inactive
= dc_inactive
},
232 VOPNAME_FID
, { .vop_fid
= dc_fid
},
233 VOPNAME_SEEK
, { .vop_seek
= dc_seek
},
234 VOPNAME_FRLOCK
, { .vop_frlock
= dc_frlock
},
235 VOPNAME_REALVP
, { .vop_realvp
= dc_realvp
},
236 VOPNAME_GETPAGE
, { .vop_getpage
= dc_getpage
},
237 VOPNAME_PUTPAGE
, { .vop_putpage
= dc_putpage
},
238 VOPNAME_MAP
, { .vop_map
= dc_map
},
239 VOPNAME_ADDMAP
, { .vop_addmap
= dc_addmap
},
240 VOPNAME_DELMAP
, { .vop_delmap
= dc_delmap
},
246 dc_open(struct vnode
**vpp
, int flag
, struct cred
*cr
, caller_context_t
*ctp
)
253 dc_close(struct vnode
*vp
, int flag
, int count
, offset_t off
,
254 struct cred
*cr
, caller_context_t
*ctp
)
256 (void) cleanlocks(vp
, ttoproc(curthread
)->p_pid
, 0);
257 cleanshares(vp
, ttoproc(curthread
)->p_pid
);
263 dc_read(struct vnode
*vp
, struct uio
*uiop
, int ioflag
, struct cred
*cr
,
264 struct caller_context
*ct
)
266 struct dcnode
*dp
= VTODC(vp
);
267 size_t rdsize
= MAX(MAXBSIZE
, dp
->dc_hdr
->ch_blksize
);
268 size_t fsize
= dp
->dc_hdr
->ch_fsize
;
272 * Loop through file with segmap, decompression will occur
281 * read to end of block or file
283 mapon
= uiop
->uio_loffset
& (rdsize
- 1);
284 n
= MIN(rdsize
- mapon
, uiop
->uio_resid
);
285 n
= MIN(n
, fsize
- uiop
->uio_loffset
);
287 return (0); /* at EOF */
289 base
= segmap_getmapflt(segkmap
, vp
, uiop
->uio_loffset
, n
, 1,
291 error
= uiomove(base
+ mapon
, n
, UIO_READ
, uiop
);
295 if (n
+ mapon
== rdsize
|| uiop
->uio_loffset
== fsize
)
299 error
= segmap_release(segkmap
, base
, flags
);
301 (void) segmap_release(segkmap
, base
, 0);
302 } while (!error
&& uiop
->uio_resid
);
308 dc_getattr(struct vnode
*vp
, struct vattr
*vap
, int flags
,
309 cred_t
*cred
, caller_context_t
*ctp
)
311 struct dcnode
*dp
= VTODC(vp
);
312 struct vnode
*subvp
= dp
->dc_subvp
;
315 error
= VOP_GETATTR(subvp
, vap
, flags
, cred
, ctp
);
317 /* substitute uncompressed size */
318 vap
->va_size
= dp
->dc_hdr
->ch_fsize
;
323 dc_setattr(struct vnode
*vp
, struct vattr
*vap
, int flags
, cred_t
*cred
,
324 caller_context_t
*ctp
)
326 struct dcnode
*dp
= VTODC(vp
);
327 struct vnode
*subvp
= dp
->dc_subvp
;
329 return (VOP_SETATTR(subvp
, vap
, flags
, cred
, ctp
));
333 dc_access(struct vnode
*vp
, int mode
, int flags
,
334 cred_t
*cred
, caller_context_t
*ctp
)
336 struct dcnode
*dp
= VTODC(vp
);
337 struct vnode
*subvp
= dp
->dc_subvp
;
339 return (VOP_ACCESS(subvp
, mode
, flags
, cred
, ctp
));
344 dc_fsync(vnode_t
*vp
, int syncflag
, cred_t
*cred
, caller_context_t
*ctp
)
351 dc_inactive(struct vnode
*vp
, cred_t
*cr
, caller_context_t
*ctp
)
353 struct dcnode
*dp
= VTODC(vp
);
355 mutex_enter(&dctable_lock
);
356 mutex_enter(&vp
->v_lock
);
357 ASSERT(vp
->v_count
>= 1);
358 if (--vp
->v_count
!= 0) {
360 * Somebody accessed the dcnode before we got a chance to
361 * remove it. They will remove it when they do a vn_rele.
363 mutex_exit(&vp
->v_lock
);
364 mutex_exit(&dctable_lock
);
367 mutex_exit(&vp
->v_lock
);
371 mutex_exit(&dctable_lock
);
375 dc_fid(struct vnode
*vp
, struct fid
*fidp
, caller_context_t
*ctp
)
377 struct dcnode
*dp
= VTODC(vp
);
378 struct vnode
*subvp
= dp
->dc_subvp
;
380 return (VOP_FID(subvp
, fidp
, ctp
));
384 dc_seek(struct vnode
*vp
, offset_t oof
, offset_t
*noffp
, caller_context_t
*ctp
)
386 struct dcnode
*dp
= VTODC(vp
);
387 struct vnode
*subvp
= dp
->dc_subvp
;
389 return (VOP_SEEK(subvp
, oof
, noffp
, ctp
));
393 dc_frlock(struct vnode
*vp
, int cmd
, struct flock64
*bfp
, int flag
,
394 offset_t offset
, struct flk_callback
*flk_cbp
,
395 cred_t
*cr
, caller_context_t
*ctp
)
397 struct dcnode
*dp
= VTODC(vp
);
402 * If file is being mapped, disallow frlock.
404 vattr
.va_mask
= AT_MODE
;
405 if (error
= VOP_GETATTR(dp
->dc_subvp
, &vattr
, 0, cr
, ctp
))
407 if (dp
->dc_mapcnt
> 0 && MANDLOCK(vp
, vattr
.va_mode
))
410 return (fs_frlock(vp
, cmd
, bfp
, flag
, offset
, flk_cbp
, cr
, ctp
));
415 dc_getblock_miss(struct vnode
*vp
, offset_t off
, size_t len
, struct page
**ppp
,
416 struct seg
*seg
, caddr_t addr
, enum seg_rw rw
, struct cred
*cr
)
418 struct dcnode
*dp
= VTODC(vp
);
419 struct comphdr
*hdr
= dp
->dc_hdr
;
424 size_t rdoff
, rdsize
, dsize
;
428 ASSERT(len
== hdr
->ch_blksize
);
430 * Get destination pages and make them addressable
432 pp
= page_create_va(vp
, off
, len
, PG_WAIT
, seg
, addr
);
433 bp
= pageio_setup(pp
, len
, vp
, B_READ
);
437 * read compressed data from subordinate vnode
439 saddr
= kmem_cache_alloc(dp
->dc_bufcache
, KM_SLEEP
);
441 rdoff
= hdr
->ch_blkmap
[cblkno
];
442 rdsize
= hdr
->ch_blkmap
[cblkno
+ 1] - rdoff
;
443 error
= vn_rdwr(UIO_READ
, dp
->dc_subvp
, saddr
, rdsize
, rdoff
,
444 UIO_SYSSPACE
, 0, 0, cr
, NULL
);
452 zerr
= z_uncompress(bp
->b_un
.b_addr
, &dsize
, saddr
, dp
->dc_zmax
);
461 xlen
= hdr
->ch_fsize
- off
;
463 bzero(bp
->b_un
.b_addr
+ xlen
, len
- xlen
);
466 } else if (dsize
!= len
)
473 kmem_cache_free(dp
->dc_bufcache
, saddr
);
480 dc_getblock(struct vnode
*vp
, offset_t off
, size_t len
, struct page
**ppp
,
481 struct seg
*seg
, caddr_t addr
, enum seg_rw rw
, struct cred
*cr
)
483 struct page
*pp
, *plist
= NULL
;
488 * pvn_read_kluster() doesn't quite do what we want, since it
489 * thinks sub block reads are ok. Here we always decompress
497 for (pgoff
= off
; pgoff
< off
+ len
; pgoff
+= PAGESIZE
) {
498 pp
= page_lookup(vp
, pgoff
, SE_EXCL
);
504 page_add(&plist
, pp
);
505 plist
= plist
->p_next
;
509 return (0); /* all pages in cache */
513 * Undo any locks so getblock_miss has an open field
518 return (dc_getblock_miss(vp
, off
, len
, ppp
, seg
, addr
, rw
, cr
));
522 dc_realvp(vnode_t
*vp
, vnode_t
**vpp
, caller_context_t
*ct
)
526 vp
= VTODC(vp
)->dc_subvp
;
527 if (VOP_REALVP(vp
, &rvp
, ct
) == 0)
535 dc_getpage(struct vnode
*vp
, offset_t off
, size_t len
, uint_t
*protp
,
536 struct page
*pl
[], size_t plsz
, struct seg
*seg
, caddr_t addr
,
537 enum seg_rw rw
, struct cred
*cr
, caller_context_t
*ctp
)
539 struct dcnode
*dp
= VTODC(vp
);
540 struct comphdr
*hdr
= dp
->dc_hdr
;
541 struct page
*pp
, *plist
= NULL
;
543 offset_t vp_boff
, vp_bend
;
544 size_t bsize
= hdr
->ch_blksize
;
547 /* does not support write */
549 panic("write attempt on compressed file");
556 * We don't support asynchronous operation at the moment, so
557 * just pretend we did it. If the pages are ever actually
558 * needed, they'll get brought in then.
564 * Calc block start and end offsets
566 vp_boff
= rounddown(off
, bsize
);
567 vp_bend
= roundup(off
+ len
, bsize
);
568 vp_baddr
= (caddr_t
)rounddown((uintptr_t)addr
, bsize
);
570 nblks
= (vp_bend
- vp_boff
) / bsize
;
572 error
= dc_getblock(vp
, vp_boff
, bsize
, &pp
, seg
, vp_baddr
,
574 page_list_concat(&plist
, &pp
);
579 pvn_plist_init(plist
, pl
, plsz
, off
, len
, rw
);
581 pvn_read_done(plist
, B_ERROR
);
586 * This function should never be called. We need to have it to pass
587 * it as an argument to other functions.
591 dc_putapage(struct vnode
*vp
, struct page
*pp
, u_offset_t
*offp
, size_t *lenp
,
592 int flags
, struct cred
*cr
)
594 /* should never happen */
595 cmn_err(CE_PANIC
, "dcfs: dc_putapage: dirty page");
602 * The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
605 * 1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
606 * 2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
607 * which translates to an MC_SYNC with the MS_INVALIDATE flag.
609 * The B_FREE (as well as the B_DONTNEED) flag is set when the
610 * MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked
611 * from SEGVN to release pages behind a pagefault.
615 dc_putpage(struct vnode
*vp
, offset_t off
, size_t len
, int flags
,
616 struct cred
*cr
, caller_context_t
*ctp
)
620 if (vp
->v_count
== 0) {
621 panic("dcfs_putpage: bad v_count");
625 if (vp
->v_flag
& VNOMAP
)
628 if (!vn_has_cached_data(vp
)) /* no pages mapped */
631 if (len
== 0) /* from 'off' to EOF */
632 error
= pvn_vplist_dirty(vp
, off
, dc_putapage
, flags
, cr
);
635 se_t se
= (flags
& (B_INVAL
| B_FREE
)) ? SE_EXCL
: SE_SHARED
;
637 for (io_off
= off
; io_off
< off
+ len
; io_off
+= PAGESIZE
) {
641 * We insist on getting the page only if we are
642 * about to invalidate, free or write it and
643 * the B_ASYNC flag is not set.
645 if ((flags
& B_INVAL
) || ((flags
& B_ASYNC
) == 0))
646 pp
= page_lookup(vp
, io_off
, se
);
648 pp
= page_lookup_nowait(vp
, io_off
, se
);
653 * Normally pvn_getdirty() should return 0, which
654 * impies that it has done the job for us.
655 * The shouldn't-happen scenario is when it returns 1.
656 * This means that the page has been modified and
657 * needs to be put back.
658 * Since we can't write to a dcfs compressed file,
659 * we fake a failed I/O and force pvn_write_done()
660 * to destroy the page.
662 if (pvn_getdirty(pp
, flags
) == 1) {
663 cmn_err(CE_NOTE
, "dc_putpage: dirty page");
664 pvn_write_done(pp
, flags
|
665 B_ERROR
| B_WRITE
| B_INVAL
| B_FORCE
);
673 dc_map(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t
*addrp
,
674 size_t len
, uchar_t prot
, uchar_t maxprot
, uint_t flags
,
675 struct cred
*cred
, caller_context_t
*ctp
)
678 struct segvn_crargs vn_a
;
681 if (vp
->v_flag
& VNOMAP
)
684 if (off
< (offset_t
)0 || (offset_t
)(off
+ len
) < (offset_t
)0)
688 * If file is being locked, disallow mapping.
690 if (error
= VOP_GETATTR(VTODC(vp
)->dc_subvp
, &vattr
, 0, cred
, ctp
))
692 if (vn_has_mandatory_locks(vp
, vattr
.va_mode
))
697 if ((flags
& MAP_FIXED
) == 0) {
698 map_addr(addrp
, len
, off
, 1, flags
);
699 if (*addrp
== NULL
) {
705 * User specified address - blow away any previous mappings
707 (void) as_unmap(as
, *addrp
, len
);
712 vn_a
.type
= flags
& MAP_TYPE
;
714 vn_a
.maxprot
= maxprot
;
715 vn_a
.flags
= flags
& ~MAP_TYPE
;
719 vn_a
.lgrp_mem_policy_flags
= 0;
721 error
= as_map(as
, *addrp
, len
, segvn_create
, &vn_a
);
728 dc_addmap(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t addr
,
729 size_t len
, uchar_t prot
, uchar_t maxprot
, uint_t flags
,
730 struct cred
*cr
, caller_context_t
*ctp
)
734 if (vp
->v_flag
& VNOMAP
)
738 mutex_enter(&dp
->dc_lock
);
739 dp
->dc_mapcnt
+= btopr(len
);
740 mutex_exit(&dp
->dc_lock
);
746 dc_delmap(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t addr
,
747 size_t len
, uint_t prot
, uint_t maxprot
, uint_t flags
,
748 struct cred
*cr
, caller_context_t
*ctp
)
752 if (vp
->v_flag
& VNOMAP
)
756 mutex_enter(&dp
->dc_lock
);
757 dp
->dc_mapcnt
-= btopr(len
);
758 ASSERT(dp
->dc_mapcnt
>= 0);
759 mutex_exit(&dp
->dc_lock
);
764 * Constructor/destructor routines for dcnodes
768 dcnode_constructor(void *buf
, void *cdrarg
, int kmflags
)
770 struct dcnode
*dp
= buf
;
773 vp
= dp
->dc_vp
= vn_alloc(kmflags
);
779 vp
->v_flag
= VNOSWAP
;
780 vp
->v_vfsp
= &dc_vfs
;
781 vn_setops(vp
, dc_vnodeops
);
784 mutex_init(&dp
->dc_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
786 dp
->dc_lrunext
= dp
->dc_lruprev
= NULL
;
794 dcnode_destructor(void *buf
, void *cdrarg
)
796 struct dcnode
*dp
= buf
;
797 struct vnode
*vp
= DCTOV(dp
);
799 mutex_destroy(&dp
->dc_lock
);
801 VERIFY(dp
->dc_hdr
== NULL
);
802 VERIFY(dp
->dc_subvp
== NULL
);
807 static struct dcnode
*
813 * If the free list is above DCLRUSIZE
816 mutex_enter(&dctable_lock
);
817 if (dclru_len
< DCLRUSIZE
) {
818 mutex_exit(&dctable_lock
);
819 dp
= kmem_cache_alloc(dcnode_cache
, KM_SLEEP
);
821 ASSERT(dclru
!= NULL
);
825 mutex_exit(&dctable_lock
);
832 dcnode_free(struct dcnode
*dp
)
834 struct vnode
*vp
= DCTOV(dp
);
836 ASSERT(MUTEX_HELD(&dctable_lock
));
839 * If no cached pages, no need to put it on lru
841 if (!vn_has_cached_data(vp
)) {
844 kmem_cache_free(dcnode_cache
, dp
);
849 * Add to lru, if it's over the limit, free from head
852 if (dclru_len
> DCLRUSIZE
) {
857 kmem_cache_free(dcnode_cache
, dp
);
862 dcnode_recycle(struct dcnode
*dp
)
868 VN_RELE(dp
->dc_subvp
);
870 (void) pvn_vplist_dirty(vp
, 0, dc_putapage
, B_INVAL
, NULL
);
871 kmem_free(dp
->dc_hdr
, dp
->dc_hdrsize
);
873 dp
->dc_hdrsize
= dp
->dc_zmax
= 0;
874 dp
->dc_bufcache
= NULL
;
878 vp
->v_flag
= VNOSWAP
;
879 vp
->v_vfsp
= &dc_vfs
;
883 dcinit(int fstype
, char *name
)
885 static const fs_operation_def_t dc_vfsops_template
[] = {
891 error
= vfs_setfsops(fstype
, dc_vfsops_template
, &dc_vfsops
);
893 cmn_err(CE_WARN
, "dcinit: bad vfs ops template");
896 VFS_INIT(&dc_vfs
, dc_vfsops
, NULL
);
897 dc_vfs
.vfs_flag
= VFS_RDONLY
;
898 dc_vfs
.vfs_fstype
= fstype
;
899 if ((dev
= getudev()) == (major_t
)-1)
901 dcdev
= makedevice(dev
, 0);
902 dc_vfs
.vfs_dev
= dcdev
;
904 error
= vn_make_ops(name
, dc_vnodeops_template
, &dc_vnodeops
);
906 (void) vfs_freevfsops_by_type(fstype
);
907 cmn_err(CE_WARN
, "dcinit: bad vnode ops template");
911 mutex_init(&dctable_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
912 mutex_init(&dccache_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
913 dcnode_cache
= kmem_cache_create("dcnode_cache", sizeof (struct dcnode
),
914 0, dcnode_constructor
, dcnode_destructor
, NULL
, NULL
, NULL
, 0);
920 * Return shadow vnode with the given vp as its subordinate
923 decompvp(struct vnode
*vp
, cred_t
*cred
, caller_context_t
*ctp
)
925 struct dcnode
*dp
, *ndp
;
926 struct comphdr thdr
, *hdr
;
927 struct kmem_cache
**cpp
;
929 size_t hdrsize
, bsize
;
933 * See if we have an existing shadow
934 * If none, we have to manufacture one
936 mutex_enter(&dctable_lock
);
938 mutex_exit(&dctable_lock
);
943 * Make sure it's a valid compressed file
946 error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)hdr
, sizeof (struct comphdr
), 0,
947 UIO_SYSSPACE
, 0, 0, cred
, NULL
);
948 if (error
|| hdr
->ch_magic
!= CH_MAGIC_ZLIB
||
949 hdr
->ch_version
!= CH_VERSION
|| hdr
->ch_algorithm
!= CH_ALG_ZLIB
||
950 hdr
->ch_fsize
== 0 || hdr
->ch_blksize
< PAGESIZE
||
951 hdr
->ch_blksize
> ptob(DCCACHESIZE
) || !ISP2(hdr
->ch_blksize
))
954 /* get underlying file size */
955 if (VOP_GETATTR(vp
, &vattr
, 0, cred
, ctp
) != 0)
959 * Re-read entire header
961 hdrsize
= hdr
->ch_blkmap
[0] + sizeof (uint64_t);
962 hdr
= kmem_alloc(hdrsize
, KM_SLEEP
);
963 error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)hdr
, hdrsize
, 0, UIO_SYSSPACE
,
966 kmem_free(hdr
, hdrsize
);
971 * add extra blkmap entry to make dc_getblock()'s
974 bsize
= hdr
->ch_blksize
;
975 hdr
->ch_blkmap
[((hdr
->ch_fsize
-1) / bsize
) + 1] = vattr
.va_size
;
977 ndp
= dcnode_alloc();
981 ndp
->dc_hdrsize
= hdrsize
;
984 * Allocate kmem cache if none there already
986 ndp
->dc_zmax
= ZMAXBUF(bsize
);
987 cpp
= &dcbuf_cache
[btop(bsize
)];
988 mutex_enter(&dccache_lock
);
990 *cpp
= kmem_cache_create("dcbuf_cache", ndp
->dc_zmax
, 0, NULL
,
991 NULL
, NULL
, NULL
, NULL
, 0);
992 mutex_exit(&dccache_lock
);
993 ndp
->dc_bufcache
= *cpp
;
996 * Recheck table in case someone else created shadow
997 * while we were blocked above.
999 mutex_enter(&dctable_lock
);
1002 mutex_exit(&dctable_lock
);
1003 dcnode_recycle(ndp
);
1004 kmem_cache_free(dcnode_cache
, ndp
);
1008 mutex_exit(&dctable_lock
);
1010 return (DCTOV(ndp
));
1015 * dcnode lookup table
1016 * These routines maintain a table of dcnodes hashed by their
1017 * subordinate vnode so that they can be found if they already
1018 * exist in the vnode cache
1022 * Put a dcnode in the table.
1025 dcinsert(struct dcnode
*newdp
)
1027 int idx
= DCHASH(newdp
->dc_subvp
);
1029 ASSERT(MUTEX_HELD(&dctable_lock
));
1030 newdp
->dc_hash
= dctable
[idx
];
1031 dctable
[idx
] = newdp
;
1035 * Remove a dcnode from the hash table.
1038 dcdelete(struct dcnode
*deldp
)
1040 int idx
= DCHASH(deldp
->dc_subvp
);
1041 struct dcnode
*dp
, *prevdp
;
1043 ASSERT(MUTEX_HELD(&dctable_lock
));
1046 dctable
[idx
] = dp
->dc_hash
;
1048 for (prevdp
= dp
, dp
= dp
->dc_hash
; dp
!= NULL
;
1049 prevdp
= dp
, dp
= dp
->dc_hash
) {
1051 prevdp
->dc_hash
= dp
->dc_hash
;
1060 * Find a shadow vnode in the dctable hash list.
1062 static struct dcnode
*
1063 dcfind(struct vnode
*vp
)
1067 ASSERT(MUTEX_HELD(&dctable_lock
));
1068 for (dp
= dctable
[DCHASH(vp
)]; dp
!= NULL
; dp
= dp
->dc_hash
)
1069 if (dp
->dc_subvp
== vp
) {
1087 for (dp
= dclru
; dp
->dc_lrunext
!= dclru
; dp
= dp
->dc_lrunext
)
1094 dclru_add(struct dcnode
*dp
)
1097 * Add to dclru as double-link chain
1099 ASSERT(MUTEX_HELD(&dctable_lock
));
1100 if (dclru
== NULL
) {
1102 dp
->dc_lruprev
= dp
->dc_lrunext
= dp
;
1104 struct dcnode
*last
= dclru
->dc_lruprev
;
1106 dclru
->dc_lruprev
= dp
;
1107 last
->dc_lrunext
= dp
;
1108 dp
->dc_lruprev
= last
;
1109 dp
->dc_lrunext
= dclru
;
1112 ASSERT(dclru_len
== dclru_count());
1116 dclru_sub(struct dcnode
*dp
)
1118 ASSERT(MUTEX_HELD(&dctable_lock
));
1119 dp
->dc_lrunext
->dc_lruprev
= dp
->dc_lruprev
;
1120 dp
->dc_lruprev
->dc_lrunext
= dp
->dc_lrunext
;
1122 dclru
= dp
->dc_lrunext
== dp
? NULL
: dp
->dc_lrunext
;
1123 dp
->dc_lrunext
= dp
->dc_lruprev
= NULL
;
1125 ASSERT(dclru_len
== dclru_count());