5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2017 by Delphix. All rights reserved.
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
40 #include <sys/types.h>
41 #include <sys/thread.h>
42 #include <sys/t_lock.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bitmap.h>
47 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/errno.h>
53 #include <sys/fcntl.h>
54 #include <sys/flock.h>
58 #include <sys/vmsystm.h>
61 #include <sys/sysmacros.h>
64 #include <sys/vnode.h>
68 #include <sys/fs/decomp.h>
74 #include <vm/seg_vn.h>
75 #include <vm/seg_kmem.h>
76 #include <vm/seg_map.h>
78 #include <sys/fs_subr.h>
81 * dcfs - A filesystem for automatic decompressing of fiocompressed files
83 * This filesystem is a layered filesystem that sits on top of a normal
84 * persistent filesystem and provides automatic decompression of files
85 * that have been previously compressed and stored on the host file system.
86 * This is a pseudo filesystem in that it does not persist data, rather it
87 * intercepts file lookup requests on the host filesystem and provides
88 * transparent decompression of those files. Currently the only supported
89 * host filesystem is ufs.
91 * A file is compressed via a userland utility (currently cmd/boot/fiocompress)
92 * and marked by fiocompress as a compressed file via a flag in the on-disk
93 * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED
94 * ufs_lookup checks for this flag and if set, passes control to decompvp
95 * a function defined in this (dcfs) filesystem. decomvp uncompresses the file
96 * and returns a dcfs vnode to the VFS layer.
98 * dcfs is layered on top of ufs and passes requests involving persistence
99 * to the underlying ufs filesystem. The compressed files currently cannot be
105 * Define data structures within this file.
108 #define DCTABLESIZE 16
110 #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0)
111 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1))
113 #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC)
118 #define DCCACHESIZE 4
120 #define rounddown(x, y) ((x) & ~((y) - 1))
122 struct dcnode
*dctable
[DCTABLESIZE
];
124 struct dcnode
*dclru
;
125 static int dclru_len
;
127 kmutex_t dctable_lock
;
132 struct kmem_cache
*dcnode_cache
;
133 struct kmem_cache
*dcbuf_cache
[DCCACHESIZE
];
135 kmutex_t dccache_lock
;
137 static int dcinit(int, char *);
139 static struct dcnode
*dcnode_alloc(void);
140 static void dcnode_free(struct dcnode
*);
141 static void dcnode_recycle(struct dcnode
*);
143 static void dcinsert(struct dcnode
*);
144 static void dcdelete(struct dcnode
*);
145 static struct dcnode
*dcfind(struct vnode
*);
146 static void dclru_add(struct dcnode
*);
147 static void dclru_sub(struct dcnode
*);
151 * This is the loadable module wrapper.
153 #include <sys/modctl.h>
155 /* yes, we want all defaults */
156 static const struct vfsops dc_vfsops
;
158 static vfsdef_t vfw
= {
167 * Module linkage information for the kernel.
169 extern struct mod_ops mod_fsops
;
171 static struct modlfs modlfs
= {
172 &mod_fsops
, "compressed filesystem", &vfw
175 static struct modlinkage modlinkage
= {
176 MODREV_1
, (void *)&modlfs
, NULL
182 return (mod_install(&modlinkage
));
186 _info(struct modinfo
*modinfop
)
188 return (mod_info(&modlinkage
, modinfop
));
192 static int dc_open(struct vnode
**, int, struct cred
*, caller_context_t
*);
193 static int dc_close(struct vnode
*, int, int, offset_t
,
194 struct cred
*, caller_context_t
*);
195 static int dc_read(struct vnode
*, struct uio
*, int, struct cred
*,
196 struct caller_context
*);
197 static int dc_getattr(struct vnode
*, struct vattr
*, int,
198 struct cred
*, caller_context_t
*);
199 static int dc_setattr(struct vnode
*, struct vattr
*, int, struct cred
*,
200 struct caller_context
*);
201 static int dc_access(struct vnode
*, int, int,
202 struct cred
*, caller_context_t
*);
203 static int dc_fsync(struct vnode
*, int, struct cred
*, caller_context_t
*);
204 static void dc_inactive(struct vnode
*, struct cred
*, caller_context_t
*);
205 static int dc_fid(struct vnode
*, struct fid
*, caller_context_t
*);
206 static int dc_seek(struct vnode
*, offset_t
, offset_t
*, caller_context_t
*);
207 static int dc_frlock(struct vnode
*, int, struct flock64
*, int, offset_t
,
208 struct flk_callback
*, struct cred
*, caller_context_t
*);
209 static int dc_realvp(struct vnode
*, struct vnode
**, caller_context_t
*);
210 static int dc_getpage(struct vnode
*, offset_t
, size_t, uint_t
*,
211 struct page
**, size_t, struct seg
*, caddr_t
, enum seg_rw
,
212 struct cred
*, caller_context_t
*);
213 static int dc_putpage(struct vnode
*, offset_t
, size_t, int,
214 struct cred
*, caller_context_t
*);
215 static int dc_map(struct vnode
*, offset_t
, struct as
*, caddr_t
*, size_t,
216 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
217 static int dc_addmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
218 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
219 static int dc_delmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
220 uint_t
, uint_t
, uint_t
, struct cred
*, caller_context_t
*);
222 static const struct vnodeops dc_vnodeops
= {
225 .vop_close
= dc_close
,
227 .vop_getattr
= dc_getattr
,
228 .vop_setattr
= dc_setattr
,
229 .vop_access
= dc_access
,
230 .vop_fsync
= dc_fsync
,
231 .vop_inactive
= dc_inactive
,
234 .vop_frlock
= dc_frlock
,
235 .vop_realvp
= dc_realvp
,
236 .vop_getpage
= dc_getpage
,
237 .vop_putpage
= dc_putpage
,
239 .vop_addmap
= dc_addmap
,
240 .vop_delmap
= dc_delmap
,
245 dc_open(struct vnode
**vpp
, int flag
, struct cred
*cr
, caller_context_t
*ctp
)
252 dc_close(struct vnode
*vp
, int flag
, int count
, offset_t off
,
253 struct cred
*cr
, caller_context_t
*ctp
)
255 (void) cleanlocks(vp
, ttoproc(curthread
)->p_pid
, 0);
256 cleanshares(vp
, ttoproc(curthread
)->p_pid
);
262 dc_read(struct vnode
*vp
, struct uio
*uiop
, int ioflag
, struct cred
*cr
,
263 struct caller_context
*ct
)
265 struct dcnode
*dp
= VTODC(vp
);
266 size_t rdsize
= MAX(MAXBSIZE
, dp
->dc_hdr
->ch_blksize
);
267 size_t fsize
= dp
->dc_hdr
->ch_fsize
;
271 * Loop through file with segmap, decompression will occur
280 * read to end of block or file
282 mapon
= uiop
->uio_loffset
& (rdsize
- 1);
283 n
= MIN(rdsize
- mapon
, uiop
->uio_resid
);
284 n
= MIN(n
, fsize
- uiop
->uio_loffset
);
286 return (0); /* at EOF */
288 base
= segmap_getmapflt(segkmap
, vp
, uiop
->uio_loffset
, n
, 1,
290 error
= uiomove(base
+ mapon
, n
, UIO_READ
, uiop
);
294 if (n
+ mapon
== rdsize
|| uiop
->uio_loffset
== fsize
)
298 error
= segmap_release(segkmap
, base
, flags
);
300 (void) segmap_release(segkmap
, base
, 0);
301 } while (!error
&& uiop
->uio_resid
);
307 dc_getattr(struct vnode
*vp
, struct vattr
*vap
, int flags
,
308 cred_t
*cred
, caller_context_t
*ctp
)
310 struct dcnode
*dp
= VTODC(vp
);
311 struct vnode
*subvp
= dp
->dc_subvp
;
314 error
= fop_getattr(subvp
, vap
, flags
, cred
, ctp
);
316 /* substitute uncompressed size */
317 vap
->va_size
= dp
->dc_hdr
->ch_fsize
;
322 dc_setattr(struct vnode
*vp
, struct vattr
*vap
, int flags
, cred_t
*cred
,
323 caller_context_t
*ctp
)
325 struct dcnode
*dp
= VTODC(vp
);
326 struct vnode
*subvp
= dp
->dc_subvp
;
328 return (fop_setattr(subvp
, vap
, flags
, cred
, ctp
));
332 dc_access(struct vnode
*vp
, int mode
, int flags
,
333 cred_t
*cred
, caller_context_t
*ctp
)
335 struct dcnode
*dp
= VTODC(vp
);
336 struct vnode
*subvp
= dp
->dc_subvp
;
338 return (fop_access(subvp
, mode
, flags
, cred
, ctp
));
343 dc_fsync(vnode_t
*vp
, int syncflag
, cred_t
*cred
, caller_context_t
*ctp
)
350 dc_inactive(struct vnode
*vp
, cred_t
*cr
, caller_context_t
*ctp
)
352 struct dcnode
*dp
= VTODC(vp
);
354 mutex_enter(&dctable_lock
);
355 mutex_enter(&vp
->v_lock
);
356 ASSERT(vp
->v_count
>= 1);
358 if (vp
->v_count
!= 0) {
360 * Somebody accessed the dcnode before we got a chance to
361 * remove it. They will remove it when they do a vn_rele.
363 mutex_exit(&vp
->v_lock
);
364 mutex_exit(&dctable_lock
);
367 mutex_exit(&vp
->v_lock
);
371 mutex_exit(&dctable_lock
);
375 dc_fid(struct vnode
*vp
, struct fid
*fidp
, caller_context_t
*ctp
)
377 struct dcnode
*dp
= VTODC(vp
);
378 struct vnode
*subvp
= dp
->dc_subvp
;
380 return (fop_fid(subvp
, fidp
, ctp
));
384 dc_seek(struct vnode
*vp
, offset_t oof
, offset_t
*noffp
, caller_context_t
*ctp
)
386 struct dcnode
*dp
= VTODC(vp
);
387 struct vnode
*subvp
= dp
->dc_subvp
;
389 return (fop_seek(subvp
, oof
, noffp
, ctp
));
393 dc_frlock(struct vnode
*vp
, int cmd
, struct flock64
*bfp
, int flag
,
394 offset_t offset
, struct flk_callback
*flk_cbp
,
395 cred_t
*cr
, caller_context_t
*ctp
)
397 struct dcnode
*dp
= VTODC(vp
);
402 * If file is being mapped, disallow frlock.
404 vattr
.va_mask
= VATTR_MODE
;
405 if (error
= fop_getattr(dp
->dc_subvp
, &vattr
, 0, cr
, ctp
))
407 if (dp
->dc_mapcnt
> 0 && MANDLOCK(vp
, vattr
.va_mode
))
410 return (fs_frlock(vp
, cmd
, bfp
, flag
, offset
, flk_cbp
, cr
, ctp
));
415 dc_getblock_miss(struct vnode
*vp
, offset_t off
, size_t len
, struct page
**ppp
,
416 struct seg
*seg
, caddr_t addr
, enum seg_rw rw
, struct cred
*cr
)
418 struct dcnode
*dp
= VTODC(vp
);
419 struct comphdr
*hdr
= dp
->dc_hdr
;
424 size_t rdoff
, rdsize
, dsize
;
428 ASSERT(len
== hdr
->ch_blksize
);
430 * Get destination pages and make them addressable
432 pp
= page_create_va(&vp
->v_object
, off
, len
, PG_WAIT
, seg
, addr
);
433 bp
= pageio_setup(pp
, len
, vp
, B_READ
);
437 * read compressed data from subordinate vnode
439 saddr
= kmem_cache_alloc(dp
->dc_bufcache
, KM_SLEEP
);
441 rdoff
= hdr
->ch_blkmap
[cblkno
];
442 rdsize
= hdr
->ch_blkmap
[cblkno
+ 1] - rdoff
;
443 error
= vn_rdwr(UIO_READ
, dp
->dc_subvp
, saddr
, rdsize
, rdoff
,
444 UIO_SYSSPACE
, 0, 0, cr
, NULL
);
452 zerr
= z_uncompress(bp
->b_un
.b_addr
, &dsize
, saddr
, dp
->dc_zmax
);
461 xlen
= hdr
->ch_fsize
- off
;
463 bzero(bp
->b_un
.b_addr
+ xlen
, len
- xlen
);
466 } else if (dsize
!= len
)
473 kmem_cache_free(dp
->dc_bufcache
, saddr
);
480 dc_getblock(struct vnode
*vp
, offset_t off
, size_t len
, struct page
**ppp
,
481 struct seg
*seg
, caddr_t addr
, enum seg_rw rw
, struct cred
*cr
)
483 struct page
*pp
, *plist
= NULL
;
488 * pvn_read_kluster() doesn't quite do what we want, since it
489 * thinks sub block reads are ok. Here we always decompress
497 for (pgoff
= off
; pgoff
< off
+ len
; pgoff
+= PAGESIZE
) {
498 pp
= page_lookup(&vp
->v_object
, pgoff
, SE_EXCL
);
504 page_add(&plist
, pp
);
505 plist
= plist
->p_next
;
509 return (0); /* all pages in cache */
513 * Undo any locks so getblock_miss has an open field
518 return (dc_getblock_miss(vp
, off
, len
, ppp
, seg
, addr
, rw
, cr
));
522 dc_realvp(vnode_t
*vp
, vnode_t
**vpp
, caller_context_t
*ct
)
526 vp
= VTODC(vp
)->dc_subvp
;
527 if (fop_realvp(vp
, &rvp
, ct
) == 0)
535 dc_getpage(struct vnode
*vp
, offset_t off
, size_t len
, uint_t
*protp
,
536 struct page
*pl
[], size_t plsz
, struct seg
*seg
, caddr_t addr
,
537 enum seg_rw rw
, struct cred
*cr
, caller_context_t
*ctp
)
539 struct dcnode
*dp
= VTODC(vp
);
540 struct comphdr
*hdr
= dp
->dc_hdr
;
541 struct page
*pp
, *plist
= NULL
;
543 offset_t vp_boff
, vp_bend
;
544 size_t bsize
= hdr
->ch_blksize
;
547 /* does not support write */
549 panic("write attempt on compressed file");
556 * We don't support asynchronous operation at the moment, so
557 * just pretend we did it. If the pages are ever actually
558 * needed, they'll get brought in then.
564 * Calc block start and end offsets
566 vp_boff
= rounddown(off
, bsize
);
567 vp_bend
= roundup(off
+ len
, bsize
);
568 vp_baddr
= (caddr_t
)rounddown((uintptr_t)addr
, bsize
);
570 nblks
= (vp_bend
- vp_boff
) / bsize
;
572 error
= dc_getblock(vp
, vp_boff
, bsize
, &pp
, seg
, vp_baddr
,
574 page_list_concat(&plist
, &pp
);
579 pvn_plist_init(plist
, pl
, plsz
, off
, len
, rw
);
581 pvn_read_done(plist
, B_ERROR
);
586 * This function should never be called. We need to have it to pass
587 * it as an argument to other functions.
591 dc_putapage(struct vnode
*vp
, struct page
*pp
, uoff_t
*offp
, size_t *lenp
,
592 int flags
, struct cred
*cr
)
594 /* should never happen */
595 cmn_err(CE_PANIC
, "dcfs: dc_putapage: dirty page");
602 * The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
605 * 1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
606 * 2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
607 * which translates to an MC_SYNC with the MS_INVALIDATE flag.
609 * The B_FREE (as well as the B_DONTNEED) flag is set when the
610 * MADV_SEQUENTIAL advice has been used. fop_putpage is invoked
611 * from SEGVN to release pages behind a pagefault.
615 dc_putpage(struct vnode
*vp
, offset_t off
, size_t len
, int flags
,
616 struct cred
*cr
, caller_context_t
*ctp
)
620 if (vp
->v_count
== 0) {
621 panic("dcfs_putpage: bad v_count");
625 if (vp
->v_flag
& VNOMAP
)
628 if (!vn_has_cached_data(vp
)) /* no pages mapped */
631 if (len
== 0) /* from 'off' to EOF */
632 error
= pvn_vplist_dirty(vp
, off
, dc_putapage
, flags
, cr
);
635 se_t se
= (flags
& (B_INVAL
| B_FREE
)) ? SE_EXCL
: SE_SHARED
;
637 for (io_off
= off
; io_off
< off
+ len
; io_off
+= PAGESIZE
) {
641 * We insist on getting the page only if we are
642 * about to invalidate, free or write it and
643 * the B_ASYNC flag is not set.
645 if ((flags
& B_INVAL
) || ((flags
& B_ASYNC
) == 0))
646 pp
= page_lookup(&vp
->v_object
, io_off
, se
);
648 pp
= page_lookup_nowait(&vp
->v_object
,
654 * Normally pvn_getdirty() should return 0, which
655 * impies that it has done the job for us.
656 * The shouldn't-happen scenario is when it returns 1.
657 * This means that the page has been modified and
658 * needs to be put back.
659 * Since we can't write to a dcfs compressed file,
660 * we fake a failed I/O and force pvn_write_done()
661 * to destroy the page.
663 if (pvn_getdirty(pp
, flags
) == 1) {
664 cmn_err(CE_NOTE
, "dc_putpage: dirty page");
665 pvn_write_done(pp
, flags
|
666 B_ERROR
| B_WRITE
| B_INVAL
| B_FORCE
);
674 dc_map(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t
*addrp
,
675 size_t len
, uchar_t prot
, uchar_t maxprot
, uint_t flags
,
676 struct cred
*cred
, caller_context_t
*ctp
)
679 struct segvn_crargs vn_a
;
682 if (vp
->v_flag
& VNOMAP
)
685 if (off
< 0 || (offset_t
)(off
+ len
) < 0)
689 * If file is being locked, disallow mapping.
691 if (error
= fop_getattr(VTODC(vp
)->dc_subvp
, &vattr
, 0, cred
, ctp
))
693 if (vn_has_mandatory_locks(vp
, vattr
.va_mode
))
698 if ((flags
& MAP_FIXED
) == 0) {
699 map_addr(addrp
, len
, off
, 1, flags
);
700 if (*addrp
== NULL
) {
706 * User specified address - blow away any previous mappings
708 (void) as_unmap(as
, *addrp
, len
);
713 vn_a
.type
= flags
& MAP_TYPE
;
715 vn_a
.maxprot
= maxprot
;
716 vn_a
.flags
= flags
& ~MAP_TYPE
;
720 vn_a
.lgrp_mem_policy_flags
= 0;
722 error
= as_map(as
, *addrp
, len
, segvn_create
, &vn_a
);
729 dc_addmap(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t addr
,
730 size_t len
, uchar_t prot
, uchar_t maxprot
, uint_t flags
,
731 struct cred
*cr
, caller_context_t
*ctp
)
735 if (vp
->v_flag
& VNOMAP
)
739 mutex_enter(&dp
->dc_lock
);
740 dp
->dc_mapcnt
+= btopr(len
);
741 mutex_exit(&dp
->dc_lock
);
747 dc_delmap(struct vnode
*vp
, offset_t off
, struct as
*as
, caddr_t addr
,
748 size_t len
, uint_t prot
, uint_t maxprot
, uint_t flags
,
749 struct cred
*cr
, caller_context_t
*ctp
)
753 if (vp
->v_flag
& VNOMAP
)
757 mutex_enter(&dp
->dc_lock
);
758 dp
->dc_mapcnt
-= btopr(len
);
759 ASSERT(dp
->dc_mapcnt
>= 0);
760 mutex_exit(&dp
->dc_lock
);
765 * Constructor/destructor routines for dcnodes
769 dcnode_constructor(void *buf
, void *cdrarg
, int kmflags
)
771 struct dcnode
*dp
= buf
;
774 vp
= dp
->dc_vp
= vn_alloc(kmflags
);
780 vp
->v_flag
= VNOSWAP
;
781 vp
->v_vfsp
= &dc_vfs
;
782 vn_setops(vp
, &dc_vnodeops
);
785 mutex_init(&dp
->dc_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
787 dp
->dc_lrunext
= dp
->dc_lruprev
= NULL
;
795 dcnode_destructor(void *buf
, void *cdrarg
)
797 struct dcnode
*dp
= buf
;
798 struct vnode
*vp
= DCTOV(dp
);
800 mutex_destroy(&dp
->dc_lock
);
802 VERIFY(dp
->dc_hdr
== NULL
);
803 VERIFY(dp
->dc_subvp
== NULL
);
808 static struct dcnode
*
814 * If the free list is above DCLRUSIZE
817 mutex_enter(&dctable_lock
);
818 if (dclru_len
< DCLRUSIZE
) {
819 mutex_exit(&dctable_lock
);
820 dp
= kmem_cache_alloc(dcnode_cache
, KM_SLEEP
);
822 ASSERT(dclru
!= NULL
);
826 mutex_exit(&dctable_lock
);
833 dcnode_free(struct dcnode
*dp
)
835 struct vnode
*vp
= DCTOV(dp
);
837 ASSERT(MUTEX_HELD(&dctable_lock
));
840 * If no cached pages, no need to put it on lru
842 if (!vn_has_cached_data(vp
)) {
845 kmem_cache_free(dcnode_cache
, dp
);
850 * Add to lru, if it's over the limit, free from head
853 if (dclru_len
> DCLRUSIZE
) {
858 kmem_cache_free(dcnode_cache
, dp
);
863 dcnode_recycle(struct dcnode
*dp
)
869 VN_RELE(dp
->dc_subvp
);
871 (void) pvn_vplist_dirty(vp
, 0, dc_putapage
, B_INVAL
, NULL
);
872 kmem_free(dp
->dc_hdr
, dp
->dc_hdrsize
);
874 dp
->dc_hdrsize
= dp
->dc_zmax
= 0;
875 dp
->dc_bufcache
= NULL
;
879 vp
->v_flag
= VNOSWAP
;
880 vp
->v_vfsp
= &dc_vfs
;
884 dcinit(int fstype
, char *name
)
889 error
= vfs_setfsops(fstype
, &dc_vfsops
);
891 cmn_err(CE_WARN
, "dcinit: bad fstype");
894 VFS_INIT(&dc_vfs
, &dc_vfsops
, NULL
);
895 dc_vfs
.vfs_flag
= VFS_RDONLY
;
896 dc_vfs
.vfs_fstype
= fstype
;
897 if ((dev
= getudev()) == (major_t
)-1)
899 dcdev
= makedevice(dev
, 0);
900 dc_vfs
.vfs_dev
= dcdev
;
902 mutex_init(&dctable_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
903 mutex_init(&dccache_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
904 dcnode_cache
= kmem_cache_create("dcnode_cache", sizeof (struct dcnode
),
905 0, dcnode_constructor
, dcnode_destructor
, NULL
, NULL
, NULL
, 0);
911 * Return shadow vnode with the given vp as its subordinate
914 decompvp(struct vnode
*vp
, cred_t
*cred
, caller_context_t
*ctp
)
916 struct dcnode
*dp
, *ndp
;
917 struct comphdr thdr
, *hdr
;
918 struct kmem_cache
**cpp
;
920 size_t hdrsize
, bsize
;
924 * See if we have an existing shadow
925 * If none, we have to manufacture one
927 mutex_enter(&dctable_lock
);
929 mutex_exit(&dctable_lock
);
934 * Make sure it's a valid compressed file
937 error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)hdr
, sizeof (struct comphdr
), 0,
938 UIO_SYSSPACE
, 0, 0, cred
, NULL
);
939 if (error
|| hdr
->ch_magic
!= CH_MAGIC_ZLIB
||
940 hdr
->ch_version
!= CH_VERSION
|| hdr
->ch_algorithm
!= CH_ALG_ZLIB
||
941 hdr
->ch_fsize
== 0 || hdr
->ch_blksize
< PAGESIZE
||
942 hdr
->ch_blksize
> ptob(DCCACHESIZE
) || !ISP2(hdr
->ch_blksize
))
945 /* get underlying file size */
946 if (fop_getattr(vp
, &vattr
, 0, cred
, ctp
) != 0)
950 * Re-read entire header
952 hdrsize
= hdr
->ch_blkmap
[0] + sizeof (uint64_t);
953 hdr
= kmem_alloc(hdrsize
, KM_SLEEP
);
954 error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)hdr
, hdrsize
, 0, UIO_SYSSPACE
,
957 kmem_free(hdr
, hdrsize
);
962 * add extra blkmap entry to make dc_getblock()'s
965 bsize
= hdr
->ch_blksize
;
966 hdr
->ch_blkmap
[((hdr
->ch_fsize
-1) / bsize
) + 1] = vattr
.va_size
;
968 ndp
= dcnode_alloc();
972 ndp
->dc_hdrsize
= hdrsize
;
975 * Allocate kmem cache if none there already
977 ndp
->dc_zmax
= ZMAXBUF(bsize
);
978 cpp
= &dcbuf_cache
[btop(bsize
)];
979 mutex_enter(&dccache_lock
);
981 *cpp
= kmem_cache_create("dcbuf_cache", ndp
->dc_zmax
, 0, NULL
,
982 NULL
, NULL
, NULL
, NULL
, 0);
983 mutex_exit(&dccache_lock
);
984 ndp
->dc_bufcache
= *cpp
;
987 * Recheck table in case someone else created shadow
988 * while we were blocked above.
990 mutex_enter(&dctable_lock
);
993 mutex_exit(&dctable_lock
);
995 kmem_cache_free(dcnode_cache
, ndp
);
999 mutex_exit(&dctable_lock
);
1001 return (DCTOV(ndp
));
1006 * dcnode lookup table
1007 * These routines maintain a table of dcnodes hashed by their
1008 * subordinate vnode so that they can be found if they already
1009 * exist in the vnode cache
1013 * Put a dcnode in the table.
1016 dcinsert(struct dcnode
*newdp
)
1018 int idx
= DCHASH(newdp
->dc_subvp
);
1020 ASSERT(MUTEX_HELD(&dctable_lock
));
1021 newdp
->dc_hash
= dctable
[idx
];
1022 dctable
[idx
] = newdp
;
1026 * Remove a dcnode from the hash table.
1029 dcdelete(struct dcnode
*deldp
)
1031 int idx
= DCHASH(deldp
->dc_subvp
);
1032 struct dcnode
*dp
, *prevdp
;
1034 ASSERT(MUTEX_HELD(&dctable_lock
));
1037 dctable
[idx
] = dp
->dc_hash
;
1039 for (prevdp
= dp
, dp
= dp
->dc_hash
; dp
!= NULL
;
1040 prevdp
= dp
, dp
= dp
->dc_hash
) {
1042 prevdp
->dc_hash
= dp
->dc_hash
;
1051 * Find a shadow vnode in the dctable hash list.
1053 static struct dcnode
*
1054 dcfind(struct vnode
*vp
)
1058 ASSERT(MUTEX_HELD(&dctable_lock
));
1059 for (dp
= dctable
[DCHASH(vp
)]; dp
!= NULL
; dp
= dp
->dc_hash
)
1060 if (dp
->dc_subvp
== vp
) {
1078 for (dp
= dclru
; dp
->dc_lrunext
!= dclru
; dp
= dp
->dc_lrunext
)
1085 dclru_add(struct dcnode
*dp
)
1088 * Add to dclru as double-link chain
1090 ASSERT(MUTEX_HELD(&dctable_lock
));
1091 if (dclru
== NULL
) {
1093 dp
->dc_lruprev
= dp
->dc_lrunext
= dp
;
1095 struct dcnode
*last
= dclru
->dc_lruprev
;
1097 dclru
->dc_lruprev
= dp
;
1098 last
->dc_lrunext
= dp
;
1099 dp
->dc_lruprev
= last
;
1100 dp
->dc_lrunext
= dclru
;
1103 ASSERT(dclru_len
== dclru_count());
1107 dclru_sub(struct dcnode
*dp
)
1109 ASSERT(MUTEX_HELD(&dctable_lock
));
1110 dp
->dc_lrunext
->dc_lruprev
= dp
->dc_lruprev
;
1111 dp
->dc_lruprev
->dc_lrunext
= dp
->dc_lrunext
;
1113 dclru
= dp
->dc_lrunext
== dp
? NULL
: dp
->dc_lrunext
;
1114 dp
->dc_lrunext
= dp
->dc_lruprev
= NULL
;
1116 ASSERT(dclru_len
== dclru_count());