4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 Andrey Sokolov
26 * Copyright 2016 Toomas Soome <tsoome@me.com>
30 * lofi (loopback file) driver - allows you to attach a file to a device,
31 * which can then be accessed through that device. The simple model is that
32 * you tell lofi to open a file, and then use the block device you get as
33 * you would any block device. lofi translates access to the block device
34 * into I/O on the underlying file. This is mostly useful for
35 * mounting images of filesystems.
37 * lofi is controlled through /dev/lofictl - this is the only device exported
38 * during attach, and is instance number 0. lofiadm communicates with lofi
39 * through ioctls on this device. When a file is attached to lofi, block and
40 * character devices are exported in /dev/lofi and /dev/rlofi. These devices
41 * are identified by lofi instance number, and the instance number is also used
42 * as the name in /dev/lofi.
44 * Virtual disks, or, labeled lofi, implements virtual disk support to
45 * support partition table and related tools. Such mappings will cause
46 * block and character devices to be exported in /dev/dsk and /dev/rdsk
49 * To support virtual disks, the instance number space is divided to two
50 * parts, upper part for instance number and lower part for minor number
51 * space to identify partitions and slices. The virtual disk support is
52 * implemented by stacking cmlb module. For virtual disks, the partition
53 * related ioctl calls are routed to cmlb module. Compression and encryption
54 * is not supported for virtual disks.
56 * Mapped devices are tracked with state structures handled with
57 * ddi_soft_state(9F) for simplicity.
59 * A file attached to lofi is opened when attached and not closed until
60 * explicitly detached from lofi. This seems more sensible than deferring
61 * the open until the /dev/lofi device is opened, for a number of reasons.
62 * One is that any failure is likely to be noticed by the person (or script)
63 * running lofiadm. Another is that it would be a security problem if the
64 * file was replaced by another one after being added but before being opened.
66 * The only hard part about lofi is the ioctls. In order to support things
67 * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
68 * So it has to fake disk geometry and partition information. More may need
69 * to be faked if your favorite utility doesn't work and you think it should
70 * (fdformat doesn't work because it really wants to know the type of floppy
71 * controller to talk to, and that didn't seem easy to fake. Or possibly even
72 * necessary, since we have mkfs_pcfs now).
74 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To
75 * support simulation of hotplug events, an optional force flag is provided.
76 * If a lofi device is open when a force detach is requested, then the
77 * underlying file is closed and any subsequent operations return EIO. When the
78 * device is closed for the last time, it will be cleaned up at that time. In
79 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
80 * detached but not removed.
84 * UFS logging. Mounting a UFS filesystem image "logging"
85 * works for basic copy testing but wedges during a build of ON through
86 * that image. Some deadlock in lufs holding the log mutex and then
87 * getting stuck on a buf. So for now, don't do that.
89 * Direct I/O. Since the filesystem data is being cached in the buffer
90 * cache, _and_ again in the underlying filesystem, it's tempting to
91 * enable direct I/O on the underlying file. Don't, because that deadlocks.
92 * I think to fix the cache-twice problem we might need filesystem support.
94 * Interesting things to do:
96 * Allow multiple files for each device. A poor-man's metadisk, basically.
98 * Pass-through ioctls on block devices. You can (though it's not
99 * documented), give lofi a block device as a file name. Then we shouldn't
100 * need to fake a geometry, however, it may be relevant if you're replacing
101 * metadisk, or using lofi to get crypto.
102 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1
103 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home.
104 * In fact this even makes sense if you have lofi "above" metadisk.
107 * Each lofi device can have its own symmetric key and cipher.
108 * They are passed to us by lofiadm(1m) in the correct format for use
109 * with the misc/kcf crypto_* routines.
111 * Each block has its own IV, that is calculated in lofi_blk_mech(), based
112 * on the "master" key held in the lsp and the block number of the buffer.
115 #include <sys/types.h>
116 #include <netinet/in.h>
117 #include <sys/sysmacros.h>
119 #include <sys/kmem.h>
120 #include <sys/cred.h>
121 #include <sys/mman.h>
122 #include <sys/errno.h>
123 #include <sys/aio_req.h>
124 #include <sys/stat.h>
125 #include <sys/file.h>
126 #include <sys/modctl.h>
127 #include <sys/conf.h>
128 #include <sys/debug.h>
129 #include <sys/vnode.h>
130 #include <sys/lofi.h>
131 #include <sys/fcntl.h>
132 #include <sys/pathname.h>
133 #include <sys/filio.h>
134 #include <sys/fdio.h>
135 #include <sys/open.h>
136 #include <sys/disp.h>
137 #include <vm/seg_map.h>
139 #include <sys/sunddi.h>
140 #include <sys/zmod.h>
141 #include <sys/id_space.h>
142 #include <sys/mkdev.h>
143 #include <sys/crypto/common.h>
144 #include <sys/crypto/api.h>
145 #include <sys/rctl.h>
146 #include <sys/vtoc.h>
147 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
148 #include <sys/scsi/impl/uscsi.h>
149 #include <sys/sysevent/dev.h>
152 #define NBLOCKS_PROP_NAME "Nblocks"
153 #define SIZE_PROP_NAME "Size"
154 #define ZONE_PROP_NAME "zone"
156 #define SETUP_C_DATA(cd, buf, len) \
157 (cd).cd_format = CRYPTO_DATA_RAW; \
158 (cd).cd_offset = 0; \
159 (cd).cd_miscdata = NULL; \
160 (cd).cd_length = (len); \
161 (cd).cd_raw.iov_base = (buf); \
162 (cd).cd_raw.iov_len = (len);
164 #define UIO_CHECK(uio) \
165 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \
166 ((uio)->uio_resid % DEV_BSIZE) != 0) { \
170 #define DEVFS_CHANNEL "devfsadm_event_channel"
171 #define LOFI_TIMEOUT 30
172 static evchan_t
*lofi_chan
;
173 static kmutex_t lofi_chan_lock
;
174 static kcondvar_t lofi_chan_cv
;
175 static nvlist_t
*lofi_devlink_cache
;
177 static void *lofi_statep
;
178 static kmutex_t lofi_lock
; /* state lock */
179 static id_space_t
*lofi_id
; /* lofi ID values */
180 static list_t lofi_list
;
181 static zone_key_t lofi_zone_key
;
184 * Because lofi_taskq_nthreads limits the actual swamping of the device, the
185 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
186 * high. If we want to be assured that the underlying device is always busy,
187 * we must be sure that the number of bytes enqueued when the number of
188 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
189 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should
190 * set maxalloc to be the maximum throughput (in bytes per second) of the
191 * underlying device divided by the minimum I/O size. We assume a realistic
192 * maximum throughput of one hundred megabytes per second; we set maxalloc on
193 * the lofi task queue to be 104857600 divided by DEV_BSIZE.
195 static int lofi_taskq_maxalloc
= 104857600 / DEV_BSIZE
;
196 static int lofi_taskq_nthreads
= 4; /* # of taskq threads per device */
198 const char lofi_crypto_magic
[6] = LOFI_CRYPTO_MAGIC
;
201 * To avoid decompressing data in a compressed segment multiple times
202 * when accessing small parts of a segment's data, we cache and reuse
203 * the uncompressed segment's data.
205 * A single cached segment is sufficient to avoid lots of duplicate
206 * segment decompress operations. A small cache size also reduces the
209 * lofi_max_comp_cache is the maximum number of decompressed data segments
210 * cached for each compressed lofi image. It can be set to 0 to disable
214 uint32_t lofi_max_comp_cache
= 1;
216 static int gzip_decompress(void *src
, size_t srclen
, void *dst
,
217 size_t *destlen
, int level
);
219 static int lzma_decompress(void *src
, size_t srclen
, void *dst
,
220 size_t *dstlen
, int level
);
222 lofi_compress_info_t lofi_compress_table
[LOFI_COMPRESS_FUNCTIONS
] = {
223 {gzip_decompress
, NULL
, 6, "gzip"}, /* default */
224 {gzip_decompress
, NULL
, 6, "gzip-6"},
225 {gzip_decompress
, NULL
, 9, "gzip-9"},
226 {lzma_decompress
, NULL
, 0, "lzma"}
229 static void lofi_strategy_task(void *);
230 static int lofi_tg_rdwr(dev_info_t
*, uchar_t
, void *, diskaddr_t
,
232 static int lofi_tg_getinfo(dev_info_t
*, int, void *, void *);
234 struct cmlb_tg_ops lofi_tg_ops
= {
242 *SzAlloc(void *p
, size_t size
)
244 return (kmem_alloc(size
, KM_SLEEP
));
249 SzFree(void *p
, void *address
, size_t size
)
251 kmem_free(address
, size
);
254 static ISzAlloc g_Alloc
= { SzAlloc
, SzFree
};
257 * Free data referenced by the linked list of cached uncompressed
261 lofi_free_comp_cache(struct lofi_state
*lsp
)
263 struct lofi_comp_cache
*lc
;
265 while ((lc
= list_remove_head(&lsp
->ls_comp_cache
)) != NULL
) {
266 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
267 kmem_free(lc
, sizeof (struct lofi_comp_cache
));
268 lsp
->ls_comp_cache_count
--;
270 ASSERT(lsp
->ls_comp_cache_count
== 0);
274 is_opened(struct lofi_state
*lsp
)
277 boolean_t last
= B_TRUE
;
279 ASSERT(MUTEX_HELD(&lofi_lock
));
280 for (i
= 0; i
< LOFI_PART_MAX
; i
++) {
281 if (lsp
->ls_open_lyr
[i
]) {
287 for (i
= 0; last
&& (i
< OTYP_LYR
); i
++) {
288 if (lsp
->ls_open_reg
[i
]) {
297 lofi_free_crypto(struct lofi_state
*lsp
)
299 ASSERT(MUTEX_HELD(&lofi_lock
));
301 if (lsp
->ls_crypto_enabled
) {
303 * Clean up the crypto state so that it doesn't hang around
304 * in memory after we are done with it.
306 if (lsp
->ls_key
.ck_data
!= NULL
) {
307 bzero(lsp
->ls_key
.ck_data
,
308 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
309 kmem_free(lsp
->ls_key
.ck_data
,
310 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
311 lsp
->ls_key
.ck_data
= NULL
;
312 lsp
->ls_key
.ck_length
= 0;
315 if (lsp
->ls_mech
.cm_param
!= NULL
) {
316 kmem_free(lsp
->ls_mech
.cm_param
,
317 lsp
->ls_mech
.cm_param_len
);
318 lsp
->ls_mech
.cm_param
= NULL
;
319 lsp
->ls_mech
.cm_param_len
= 0;
322 if (lsp
->ls_iv_mech
.cm_param
!= NULL
) {
323 kmem_free(lsp
->ls_iv_mech
.cm_param
,
324 lsp
->ls_iv_mech
.cm_param_len
);
325 lsp
->ls_iv_mech
.cm_param
= NULL
;
326 lsp
->ls_iv_mech
.cm_param_len
= 0;
329 mutex_destroy(&lsp
->ls_crypto_lock
);
335 lofi_tg_rdwr(dev_info_t
*dip
, uchar_t cmd
, void *bufaddr
, diskaddr_t start
,
336 size_t length
, void *tg_cookie
)
338 struct lofi_state
*lsp
;
343 instance
= ddi_get_instance(dip
);
344 if (instance
== 0) /* control node does not have disk */
347 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
352 if (cmd
!= TG_READ
&& cmd
!= TG_WRITE
)
356 * Make sure the mapping is set up by checking lsp->ls_vp_ready.
358 mutex_enter(&lsp
->ls_vp_lock
);
359 while (lsp
->ls_vp_ready
== B_FALSE
)
360 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
361 mutex_exit(&lsp
->ls_vp_lock
);
363 if (P2PHASE(length
, (1U << lsp
->ls_lbshift
)) != 0) {
364 /* We can only transfer whole blocks at a time! */
368 bp
= getrbuf(KM_SLEEP
);
370 if (cmd
== TG_READ
) {
371 bp
->b_flags
= B_READ
;
373 if (lsp
->ls_readonly
== B_TRUE
) {
377 bp
->b_flags
= B_WRITE
;
380 bp
->b_un
.b_addr
= bufaddr
;
381 bp
->b_bcount
= length
;
382 bp
->b_lblkno
= start
;
383 bp
->b_private
= NULL
;
384 bp
->b_edev
= lsp
->ls_dev
;
387 mutex_enter(lsp
->ls_kstat
->ks_lock
);
388 kstat_waitq_enter(KSTAT_IO_PTR(lsp
->ls_kstat
));
389 mutex_exit(lsp
->ls_kstat
->ks_lock
);
391 (void) taskq_dispatch(lsp
->ls_taskq
, lofi_strategy_task
, bp
, KM_SLEEP
);
400 * Get device geometry info for cmlb.
402 * We have mapped disk image as virtual block device and have to report
403 * physical/virtual geometry to cmlb.
405 * So we have two principal cases:
406 * 1. Uninitialised image without any existing labels,
407 * for this case we fabricate the data based on mapped image.
408 * 2. Image with existing label information.
409 * Since we have no information how the image was created (it may be
410 * dump from some physical device), we need to rely on label information
411 * from image, or we get "corrupted label" errors.
412 * NOTE: label can be MBR, MBR+SMI, GPT
415 lofi_tg_getinfo(dev_info_t
*dip
, int cmd
, void *arg
, void *tg_cookie
)
417 struct lofi_state
*lsp
;
421 _NOTE(ARGUNUSED(tg_cookie
));
422 instance
= ddi_get_instance(dip
);
423 if (instance
== 0) /* control device has no storage */
426 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
432 * Make sure the mapping is set up by checking lsp->ls_vp_ready.
434 * When mapping is created, new lofi instance is created and
435 * lofi_attach() will call cmlb_attach() as part of the procedure
436 * to set the mapping up. This chain of events will happen in
438 * Since cmlb_attach() will call lofi_tg_getinfo to get
439 * capacity, we return error on that call if cookie is set,
440 * otherwise lofi_attach will be stuck as the mapping is not yet
441 * finalized and lofi is not yet ready.
442 * Note, such error is not fatal for cmlb, as the label setup
443 * will be finalized when cmlb_validate() is called.
445 mutex_enter(&lsp
->ls_vp_lock
);
446 if (tg_cookie
!= NULL
&& lsp
->ls_vp_ready
== B_FALSE
) {
447 mutex_exit(&lsp
->ls_vp_lock
);
450 while (lsp
->ls_vp_ready
== B_FALSE
)
451 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
452 mutex_exit(&lsp
->ls_vp_lock
);
454 ashift
= lsp
->ls_lbshift
;
457 case TG_GETPHYGEOM
: {
458 cmlb_geom_t
*geomp
= arg
;
461 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> ashift
;
462 geomp
->g_nsect
= lsp
->ls_dkg
.dkg_nsect
;
463 geomp
->g_nhead
= lsp
->ls_dkg
.dkg_nhead
;
464 geomp
->g_acyl
= lsp
->ls_dkg
.dkg_acyl
;
465 geomp
->g_ncyl
= lsp
->ls_dkg
.dkg_ncyl
;
466 geomp
->g_secsize
= (1U << ashift
);
467 geomp
->g_intrlv
= lsp
->ls_dkg
.dkg_intrlv
;
468 geomp
->g_rpm
= lsp
->ls_dkg
.dkg_rpm
;
474 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> ashift
;
477 case TG_GETBLOCKSIZE
:
478 *(uint32_t *)arg
= (1U << ashift
);
482 tg_attribute_t
*tgattr
= arg
;
484 tgattr
->media_is_writable
= !lsp
->ls_readonly
;
485 tgattr
->media_is_solid_state
= B_FALSE
;
495 lofi_destroy(struct lofi_state
*lsp
, cred_t
*credp
)
497 int id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
500 ASSERT(MUTEX_HELD(&lofi_lock
));
502 list_remove(&lofi_list
, lsp
);
504 lofi_free_crypto(lsp
);
507 * Free pre-allocated compressed buffers
509 if (lsp
->ls_comp_bufs
!= NULL
) {
510 for (i
= 0; i
< lofi_taskq_nthreads
; i
++) {
511 if (lsp
->ls_comp_bufs
[i
].bufsize
> 0)
512 kmem_free(lsp
->ls_comp_bufs
[i
].buf
,
513 lsp
->ls_comp_bufs
[i
].bufsize
);
515 kmem_free(lsp
->ls_comp_bufs
,
516 sizeof (struct compbuf
) * lofi_taskq_nthreads
);
519 if (lsp
->ls_vp
!= NULL
) {
520 (void) VOP_PUTPAGE(lsp
->ls_vp
, 0, 0, B_INVAL
, credp
, NULL
);
521 (void) VOP_CLOSE(lsp
->ls_vp
, lsp
->ls_openflag
,
525 if (lsp
->ls_stacked_vp
!= lsp
->ls_vp
)
526 VN_RELE(lsp
->ls_stacked_vp
);
528 if (lsp
->ls_taskq
!= NULL
)
529 taskq_destroy(lsp
->ls_taskq
);
531 if (lsp
->ls_kstat
!= NULL
)
532 kstat_delete(lsp
->ls_kstat
);
535 * Free cached decompressed segment data
537 lofi_free_comp_cache(lsp
);
538 list_destroy(&lsp
->ls_comp_cache
);
540 if (lsp
->ls_uncomp_seg_sz
> 0) {
541 kmem_free(lsp
->ls_comp_index_data
, lsp
->ls_comp_index_data_sz
);
542 lsp
->ls_uncomp_seg_sz
= 0;
545 rctl_decr_lofi(lsp
->ls_zone
.zref_zone
, 1);
546 zone_rele_ref(&lsp
->ls_zone
, ZONE_REF_LOFI
);
548 mutex_destroy(&lsp
->ls_comp_cache_lock
);
549 mutex_destroy(&lsp
->ls_comp_bufs_lock
);
550 mutex_destroy(&lsp
->ls_kstat_lock
);
551 mutex_destroy(&lsp
->ls_vp_lock
);
552 cv_destroy(&lsp
->ls_vp_cv
);
553 lsp
->ls_vp_ready
= B_FALSE
;
555 ASSERT(ddi_get_soft_state(lofi_statep
, id
) == lsp
);
556 (void) ndi_devi_offline(lsp
->ls_dip
, NDI_DEVI_REMOVE
);
557 id_free(lofi_id
, id
);
561 lofi_free_dev(struct lofi_state
*lsp
)
563 ASSERT(MUTEX_HELD(&lofi_lock
));
565 if (lsp
->ls_cmlbhandle
!= NULL
) {
566 cmlb_invalidate(lsp
->ls_cmlbhandle
, 0);
567 cmlb_detach(lsp
->ls_cmlbhandle
, 0);
568 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
569 lsp
->ls_cmlbhandle
= NULL
;
571 (void) ddi_prop_remove_all(lsp
->ls_dip
);
572 ddi_remove_minor_node(lsp
->ls_dip
, NULL
);
577 lofi_zone_shutdown(zoneid_t zoneid
, void *arg
)
579 struct lofi_state
*lsp
;
580 struct lofi_state
*next
;
582 mutex_enter(&lofi_lock
);
584 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
; lsp
= next
) {
586 /* lofi_destroy() frees lsp */
587 next
= list_next(&lofi_list
, lsp
);
589 if (lsp
->ls_zone
.zref_zone
->zone_id
!= zoneid
)
593 * No in-zone processes are running, but something has this
594 * open. It's either a global zone process, or a lofi
595 * mount. In either case we set ls_cleanup so the last
596 * user destroys the device.
598 if (is_opened(lsp
)) {
602 lofi_destroy(lsp
, kcred
);
606 mutex_exit(&lofi_lock
);
611 lofi_open(dev_t
*devp
, int flag
, int otyp
, struct cred
*credp
)
620 struct lofi_state
*lsp
;
625 ndelay
= (flag
& (FNDELAY
| FNONBLOCK
)) ? B_TRUE
: B_FALSE
;
628 * lofiadm -a /dev/lofi/1 gets us here.
630 if (mutex_owner(&lofi_lock
) == curthread
)
633 mutex_enter(&lofi_lock
);
635 id
= LOFI_MINOR2ID(getminor(*devp
));
636 part
= LOFI_PART(getminor(*devp
));
639 /* master control device */
641 mutex_exit(&lofi_lock
);
645 /* otherwise, the mapping should already exist */
646 lsp
= ddi_get_soft_state(lofi_statep
, id
);
648 mutex_exit(&lofi_lock
);
652 if (lsp
->ls_vp
== NULL
) {
653 mutex_exit(&lofi_lock
);
657 if (lsp
->ls_readonly
&& (flag
& FWRITE
)) {
658 mutex_exit(&lofi_lock
);
662 if ((lsp
->ls_open_excl
) & (mask
)) {
663 mutex_exit(&lofi_lock
);
668 if (lsp
->ls_open_lyr
[part
]) {
669 mutex_exit(&lofi_lock
);
672 for (int i
= 0; i
< OTYP_LYR
; i
++) {
673 if (lsp
->ls_open_reg
[i
] & mask
) {
674 mutex_exit(&lofi_lock
);
680 if (lsp
->ls_cmlbhandle
!= NULL
) {
681 if (cmlb_validate(lsp
->ls_cmlbhandle
, 0, 0) != 0) {
683 * non-blocking opens are allowed to succeed to
684 * support format and fdisk to create partitioning.
687 mutex_exit(&lofi_lock
);
690 } else if (cmlb_partinfo(lsp
->ls_cmlbhandle
, part
, &nblks
, &lba
,
691 NULL
, NULL
, 0) == 0) {
692 if ((!nblks
) && ((!ndelay
) || (otyp
!= OTYP_CHR
))) {
693 mutex_exit(&lofi_lock
);
696 } else if (!ndelay
) {
697 mutex_exit(&lofi_lock
);
702 if (otyp
== OTYP_LYR
) {
703 lsp
->ls_open_lyr
[part
]++;
705 lsp
->ls_open_reg
[otyp
] |= mask
;
708 lsp
->ls_open_excl
|= mask
;
711 mutex_exit(&lofi_lock
);
717 lofi_close(dev_t dev
, int flag
, int otyp
, struct cred
*credp
)
722 struct lofi_state
*lsp
;
724 id
= LOFI_MINOR2ID(getminor(dev
));
725 part
= LOFI_PART(getminor(dev
));
728 mutex_enter(&lofi_lock
);
729 lsp
= ddi_get_soft_state(lofi_statep
, id
);
731 mutex_exit(&lofi_lock
);
736 mutex_exit(&lofi_lock
);
740 if (lsp
->ls_open_excl
& mask
)
741 lsp
->ls_open_excl
&= ~mask
;
743 if (otyp
== OTYP_LYR
) {
744 lsp
->ls_open_lyr
[part
]--;
746 lsp
->ls_open_reg
[otyp
] &= ~mask
;
750 * If we forcibly closed the underlying device (li_force), or
751 * asked for cleanup (li_cleanup), finish up if we're the last
754 if (!is_opened(lsp
) && (lsp
->ls_cleanup
|| lsp
->ls_vp
== NULL
)) {
756 lofi_destroy(lsp
, credp
);
759 mutex_exit(&lofi_lock
);
764 * Sets the mechanism's initialization vector (IV) if one is needed.
765 * The IV is computed from the data block number. lsp->ls_mech is
767 * lsp->ls_mech.cm_param_len is set to the IV len.
768 * lsp->ls_mech.cm_param is set to the IV.
771 lofi_blk_mech(struct lofi_state
*lsp
, longlong_t lblkno
)
781 ASSERT(MUTEX_HELD(&lsp
->ls_crypto_lock
));
784 return (CRYPTO_DEVICE_ERROR
);
786 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */
787 if (lsp
->ls_iv_type
== IVM_NONE
) {
788 return (CRYPTO_SUCCESS
);
792 * if kmem already alloced from previous call and it's the same size
793 * we need now, just recycle it; allocate new kmem only if we have to
795 if (lsp
->ls_mech
.cm_param
== NULL
||
796 lsp
->ls_mech
.cm_param_len
!= lsp
->ls_iv_len
) {
797 iv_len
= lsp
->ls_iv_len
;
798 iv
= kmem_zalloc(iv_len
, KM_SLEEP
);
800 iv_len
= lsp
->ls_mech
.cm_param_len
;
801 iv
= lsp
->ls_mech
.cm_param
;
805 switch (lsp
->ls_iv_type
) {
807 /* iv is not static, lblkno changes each time */
809 datasz
= sizeof (lblkno
);
818 * write blkno into the iv buffer padded on the left in case
819 * blkno ever grows bigger than its current longlong_t size
820 * or a variation other than blkno is used for the iv data
822 min
= MIN(datasz
, iv_len
);
823 bcopy(data
, iv
+ (iv_len
- min
), min
);
825 /* encrypt the data in-place to get the IV */
826 SETUP_C_DATA(cdata
, iv
, iv_len
);
828 ret
= crypto_encrypt(&lsp
->ls_iv_mech
, &cdata
, &lsp
->ls_key
,
830 if (ret
!= CRYPTO_SUCCESS
) {
831 cmn_err(CE_WARN
, "failed to create iv for block %lld: (0x%x)",
833 if (lsp
->ls_mech
.cm_param
!= iv
)
834 kmem_free(iv
, iv_len
);
839 /* clean up the iv from the last computation */
840 if (lsp
->ls_mech
.cm_param
!= NULL
&& lsp
->ls_mech
.cm_param
!= iv
)
841 kmem_free(lsp
->ls_mech
.cm_param
, lsp
->ls_mech
.cm_param_len
);
843 lsp
->ls_mech
.cm_param_len
= iv_len
;
844 lsp
->ls_mech
.cm_param
= iv
;
846 return (CRYPTO_SUCCESS
);
850 * Performs encryption and decryption of a chunk of data of size "len",
851 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of
855 lofi_crypto(struct lofi_state
*lsp
, struct buf
*bp
, caddr_t plaintext
,
856 caddr_t ciphertext
, size_t len
, boolean_t op_encrypt
)
861 longlong_t lblkno
= bp
->b_lblkno
;
863 mutex_enter(&lsp
->ls_crypto_lock
);
866 * though we could encrypt/decrypt entire "len" chunk of data, we need
867 * to break it into DEV_BSIZE pieces to capture blkno incrementing
869 SETUP_C_DATA(cdata
, plaintext
, len
);
870 cdata
.cd_length
= DEV_BSIZE
;
871 if (ciphertext
!= NULL
) { /* not in-place crypto */
872 SETUP_C_DATA(wdata
, ciphertext
, len
);
873 wdata
.cd_length
= DEV_BSIZE
;
877 ret
= lofi_blk_mech(lsp
, lblkno
);
878 if (ret
!= CRYPTO_SUCCESS
)
882 ret
= crypto_encrypt(&lsp
->ls_mech
, &cdata
,
884 ((ciphertext
!= NULL
) ? &wdata
: NULL
), NULL
);
886 ret
= crypto_decrypt(&lsp
->ls_mech
, &cdata
,
888 ((ciphertext
!= NULL
) ? &wdata
: NULL
), NULL
);
891 cdata
.cd_offset
+= DEV_BSIZE
;
892 if (ciphertext
!= NULL
)
893 wdata
.cd_offset
+= DEV_BSIZE
;
895 } while (ret
== CRYPTO_SUCCESS
&& cdata
.cd_offset
< len
);
897 mutex_exit(&lsp
->ls_crypto_lock
);
899 if (ret
!= CRYPTO_SUCCESS
) {
900 cmn_err(CE_WARN
, "%s failed for block %lld: (0x%x)",
901 op_encrypt
? "crypto_encrypt()" : "crypto_decrypt()",
912 lofi_rdwr(caddr_t bufaddr
, offset_t offset
, struct buf
*bp
,
913 struct lofi_state
*lsp
, size_t len
, int method
, caddr_t bcopy_locn
)
920 * Handles reads/writes for both plain and encrypted lofi
921 * Note: offset is already shifted by lsp->ls_crypto_offset
925 isread
= bp
->b_flags
& B_READ
;
927 if (method
== RDWR_BCOPY
) {
928 /* DO NOT update bp->b_resid for bcopy */
929 bcopy(bcopy_locn
, bufaddr
, len
);
931 } else { /* RDWR_RAW */
932 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, bufaddr
, len
,
933 offset
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
,
937 if (lsp
->ls_crypto_enabled
&& error
== 0) {
938 if (lofi_crypto(lsp
, bp
, bufaddr
, NULL
, len
,
939 B_FALSE
) != CRYPTO_SUCCESS
) {
941 * XXX: original code didn't set residual
942 * back to len because no error was expected
943 * from bcopy() if encryption is not enabled
945 if (method
!= RDWR_BCOPY
)
952 void *iobuf
= bufaddr
;
954 if (lsp
->ls_crypto_enabled
) {
955 /* don't do in-place crypto to keep bufaddr intact */
956 iobuf
= kmem_alloc(len
, KM_SLEEP
);
957 if (lofi_crypto(lsp
, bp
, bufaddr
, iobuf
, len
,
958 B_TRUE
) != CRYPTO_SUCCESS
) {
959 kmem_free(iobuf
, len
);
960 if (method
!= RDWR_BCOPY
)
965 if (method
== RDWR_BCOPY
) {
966 /* DO NOT update bp->b_resid for bcopy */
967 bcopy(iobuf
, bcopy_locn
, len
);
969 } else { /* RDWR_RAW */
970 error
= vn_rdwr(UIO_WRITE
, lsp
->ls_vp
, iobuf
, len
,
971 offset
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
,
975 if (lsp
->ls_crypto_enabled
) {
976 kmem_free(iobuf
, len
);
983 lofi_mapped_rdwr(caddr_t bufaddr
, offset_t offset
, struct buf
*bp
,
984 struct lofi_state
*lsp
)
987 offset_t alignedoffset
, mapoffset
;
997 * Note: offset is already shifted by lsp->ls_crypto_offset
1000 if (lsp
->ls_crypto_enabled
)
1001 ASSERT(lsp
->ls_vp_comp_size
== lsp
->ls_vp_size
);
1004 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
1005 * an 8K boundary, but the buf transfer address may not be
1006 * aligned on more than a 512-byte boundary (we don't enforce
1007 * that even though we could). This matters since the initial
1008 * part of the transfer may not start at offset 0 within the
1009 * segmap'd chunk. So we have to compensate for that with
1010 * 'mapoffset'. Subsequent chunks always start off at the
1011 * beginning, and the last is capped by b_resid
1013 * Visually, where "|" represents page map boundaries:
1014 * alignedoffset (mapaddr begins at this segmap boundary)
1015 * | offset (from beginning of file)
1018 * ===|====X========|====...======|========X====|====
1019 * /-------------...---------------/
1020 * ^ bp->b_bcount/bp->b_resid at start
1021 * /----/--------/----...------/--------/
1023 * | | | | nth xfersize (<= MAXBSIZE)
1024 * | | 2nd thru n-1st xfersize (= MAXBSIZE)
1025 * | 1st xfersize (<= MAXBSIZE)
1026 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter)
1028 * Notes: "alignedoffset" is "offset" rounded down to nearest
1029 * MAXBSIZE boundary. "len" is next page boundary of size
1030 * PAGESIZE after "alignedoffset".
1032 mapoffset
= offset
& MAXBOFFSET
;
1033 alignedoffset
= offset
- mapoffset
;
1034 bp
->b_resid
= bp
->b_bcount
;
1035 isread
= bp
->b_flags
& B_READ
;
1036 srw
= isread
? S_READ
: S_WRITE
;
1038 xfersize
= MIN(lsp
->ls_vp_comp_size
- offset
,
1039 MIN(MAXBSIZE
- mapoffset
, bp
->b_resid
));
1040 len
= roundup(mapoffset
+ xfersize
, PAGESIZE
);
1041 mapaddr
= segmap_getmapflt(segkmap
, lsp
->ls_vp
,
1042 alignedoffset
, MAXBSIZE
, 1, srw
);
1044 * Now fault in the pages. This lets us check
1045 * for errors before we reference mapaddr and
1046 * try to resolve the fault in bcopy (which would
1047 * panic instead). And this can easily happen,
1048 * particularly if you've lofi'd a file over NFS
1049 * and someone deletes the file on the server.
1051 error
= segmap_fault(kas
.a_hat
, segkmap
, mapaddr
,
1052 len
, F_SOFTLOCK
, srw
);
1054 (void) segmap_release(segkmap
, mapaddr
, 0);
1055 if (FC_CODE(error
) == FC_OBJERR
)
1056 error
= FC_ERRNO(error
);
1061 /* error may be non-zero for encrypted lofi */
1062 error
= lofi_rdwr(bufaddr
, 0, bp
, lsp
, xfersize
,
1063 RDWR_BCOPY
, mapaddr
+ mapoffset
);
1065 bp
->b_resid
-= xfersize
;
1066 bufaddr
+= xfersize
;
1073 * If we're reading an entire page starting
1074 * at a page boundary, there's a good chance
1075 * we won't need it again. Put it on the
1076 * head of the freelist.
1078 if (mapoffset
== 0 && xfersize
== MAXBSIZE
)
1079 smflags
|= SM_DONTNEED
;
1082 * Write back good pages, it is okay to
1083 * always release asynchronous here as we'll
1084 * follow with VOP_FSYNC for B_SYNC buffers.
1087 smflags
|= SM_WRITE
| SM_ASYNC
;
1089 (void) segmap_fault(kas
.a_hat
, segkmap
, mapaddr
,
1090 len
, F_SOFTUNLOCK
, srw
);
1091 save_error
= segmap_release(segkmap
, mapaddr
, smflags
);
1094 /* only the first map may start partial */
1096 alignedoffset
+= MAXBSIZE
;
1097 } while ((error
== 0) && (bp
->b_resid
> 0) &&
1098 (offset
< lsp
->ls_vp_comp_size
));
1104 * Check if segment seg_index is present in the decompressed segment
1107 * Returns a pointer to the decompressed segment data cache entry if
1108 * found, and NULL when decompressed data for this segment is not yet
1111 static struct lofi_comp_cache
*
1112 lofi_find_comp_data(struct lofi_state
*lsp
, uint64_t seg_index
)
1114 struct lofi_comp_cache
*lc
;
1116 ASSERT(MUTEX_HELD(&lsp
->ls_comp_cache_lock
));
1118 for (lc
= list_head(&lsp
->ls_comp_cache
); lc
!= NULL
;
1119 lc
= list_next(&lsp
->ls_comp_cache
, lc
)) {
1120 if (lc
->lc_index
== seg_index
) {
1122 * Decompressed segment data was found in the
1125 * The cache uses an LRU replacement strategy;
1126 * move the entry to head of list.
1128 list_remove(&lsp
->ls_comp_cache
, lc
);
1129 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1137 * Add the data for a decompressed segment at segment index
1138 * seg_index to the cache of the decompressed segments.
1140 * Returns a pointer to the cache element structure in case
1141 * the data was added to the cache; returns NULL when the data
1144 static struct lofi_comp_cache
*
1145 lofi_add_comp_data(struct lofi_state
*lsp
, uint64_t seg_index
,
1148 struct lofi_comp_cache
*lc
;
1150 ASSERT(MUTEX_HELD(&lsp
->ls_comp_cache_lock
));
1152 while (lsp
->ls_comp_cache_count
> lofi_max_comp_cache
) {
1153 lc
= list_remove_tail(&lsp
->ls_comp_cache
);
1155 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
1156 kmem_free(lc
, sizeof (struct lofi_comp_cache
));
1157 lsp
->ls_comp_cache_count
--;
1161 * Do not cache when disabled by tunable variable
1163 if (lofi_max_comp_cache
== 0)
1167 * When the cache has not yet reached the maximum allowed
1168 * number of segments, allocate a new cache element.
1169 * Otherwise the cache is full; reuse the last list element
1170 * (LRU) for caching the decompressed segment data.
1172 * The cache element for the new decompressed segment data is
1173 * added to the head of the list.
1175 if (lsp
->ls_comp_cache_count
< lofi_max_comp_cache
) {
1176 lc
= kmem_alloc(sizeof (struct lofi_comp_cache
), KM_SLEEP
);
1178 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1179 lsp
->ls_comp_cache_count
++;
1181 lc
= list_remove_tail(&lsp
->ls_comp_cache
);
1184 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1188 * Free old uncompressed segment data when reusing a cache
1191 if (lc
->lc_data
!= NULL
)
1192 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
1195 lc
->lc_index
= seg_index
;
1202 gzip_decompress(void *src
, size_t srclen
, void *dst
,
1203 size_t *dstlen
, int level
)
1205 ASSERT(*dstlen
>= srclen
);
1207 if (z_uncompress(dst
, dstlen
, src
, srclen
) != Z_OK
)
1212 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8)
1215 lzma_decompress(void *src
, size_t srclen
, void *dst
,
1216 size_t *dstlen
, int level
)
1222 insizepure
= srclen
- LZMA_HEADER_SIZE
;
1223 actual_src
= (void *)((Byte
*)src
+ LZMA_HEADER_SIZE
);
1225 if (LzmaDecode((Byte
*)dst
, (size_t *)dstlen
,
1226 (const Byte
*)actual_src
, &insizepure
,
1227 (const Byte
*)src
, LZMA_PROPS_SIZE
, LZMA_FINISH_ANY
, &status
,
1228 &g_Alloc
) != SZ_OK
) {
1235 * This is basically what strategy used to be before we found we
1236 * needed task queues.
1239 lofi_strategy_task(void *arg
)
1241 struct buf
*bp
= (struct buf
*)arg
;
1244 struct lofi_state
*lsp
;
1249 boolean_t bufinited
= B_FALSE
;
1251 lsp
= ddi_get_soft_state(lofi_statep
,
1252 LOFI_MINOR2ID(getminor(bp
->b_edev
)));
1258 if (lsp
->ls_kstat
) {
1259 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1260 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp
->ls_kstat
));
1261 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1264 mutex_enter(&lsp
->ls_vp_lock
);
1265 lsp
->ls_vp_iocount
++;
1266 mutex_exit(&lsp
->ls_vp_lock
);
1269 bufaddr
= bp
->b_un
.b_addr
;
1270 offset
= (bp
->b_lblkno
+ (diskaddr_t
)(uintptr_t)bp
->b_private
)
1271 << lsp
->ls_lbshift
; /* offset within file */
1272 if (lsp
->ls_crypto_enabled
) {
1273 /* encrypted data really begins after crypto header */
1274 offset
+= lsp
->ls_crypto_offset
;
1279 if (lsp
->ls_vp
== NULL
|| lsp
->ls_vp_closereq
) {
1285 * If we're writing and the buffer was not B_ASYNC
1286 * we'll follow up with a VOP_FSYNC() to force any
1287 * asynchronous I/O to stable storage.
1289 if (!(bp
->b_flags
& B_READ
) && !(bp
->b_flags
& B_ASYNC
))
1293 * We used to always use vn_rdwr here, but we cannot do that because
1294 * we might decide to read or write from the the underlying
1295 * file during this call, which would be a deadlock because
1296 * we have the rw_lock. So instead we page, unless it's not
1297 * mapable or it's a character device or it's an encrypted lofi.
1299 if ((lsp
->ls_vp
->v_flag
& VNOMAP
) || (lsp
->ls_vp
->v_type
== VCHR
) ||
1300 lsp
->ls_crypto_enabled
) {
1301 error
= lofi_rdwr(bufaddr
, offset
, bp
, lsp
, len
, RDWR_RAW
,
1303 } else if (lsp
->ls_uncomp_seg_sz
== 0) {
1304 error
= lofi_mapped_rdwr(bufaddr
, offset
, bp
, lsp
);
1306 uchar_t
*compressed_seg
= NULL
, *cmpbuf
;
1307 uchar_t
*uncompressed_seg
= NULL
;
1308 lofi_compress_info_t
*li
;
1311 uint64_t sblkno
, eblkno
, cmpbytes
;
1312 uint64_t uncompressed_seg_index
;
1313 struct lofi_comp_cache
*lc
;
1314 offset_t sblkoff
, eblkoff
;
1315 u_offset_t salign
, ealign
;
1317 uint32_t comp_data_sz
;
1322 * From here on we're dealing primarily with compressed files
1324 ASSERT(!lsp
->ls_crypto_enabled
);
1327 * Compressed files can only be read from and
1330 if (!(bp
->b_flags
& B_READ
)) {
1331 bp
->b_resid
= bp
->b_bcount
;
1336 ASSERT(lsp
->ls_comp_algorithm_index
>= 0);
1337 li
= &lofi_compress_table
[lsp
->ls_comp_algorithm_index
];
1339 * Compute starting and ending compressed segment numbers
1340 * We use only bitwise operations avoiding division and
1341 * modulus because we enforce the compression segment size
1344 sblkno
= offset
>> lsp
->ls_comp_seg_shift
;
1345 sblkoff
= offset
& (lsp
->ls_uncomp_seg_sz
- 1);
1346 eblkno
= (offset
+ bp
->b_bcount
) >> lsp
->ls_comp_seg_shift
;
1347 eblkoff
= (offset
+ bp
->b_bcount
) & (lsp
->ls_uncomp_seg_sz
- 1);
1350 * Check the decompressed segment cache.
1352 * The cache is used only when the requested data
1353 * is within a segment. Requests that cross
1354 * segment boundaries bypass the cache.
1356 if (sblkno
== eblkno
||
1357 (sblkno
+ 1 == eblkno
&& eblkoff
== 0)) {
1359 * Request doesn't cross a segment boundary,
1360 * now check the cache.
1362 mutex_enter(&lsp
->ls_comp_cache_lock
);
1363 lc
= lofi_find_comp_data(lsp
, sblkno
);
1366 * We've found the decompressed segment
1367 * data in the cache; reuse it.
1369 bcopy(lc
->lc_data
+ sblkoff
, bufaddr
,
1371 mutex_exit(&lsp
->ls_comp_cache_lock
);
1376 mutex_exit(&lsp
->ls_comp_cache_lock
);
1380 * Align start offset to block boundary for segmap
1382 salign
= lsp
->ls_comp_seg_index
[sblkno
];
1383 sdiff
= salign
& (DEV_BSIZE
- 1);
1385 if (eblkno
>= (lsp
->ls_comp_index_sz
- 1)) {
1387 * We're dealing with the last segment of
1388 * the compressed file -- the size of this
1389 * segment *may not* be the same as the
1390 * segment size for the file
1392 eblkoff
= (offset
+ bp
->b_bcount
) &
1393 (lsp
->ls_uncomp_last_seg_sz
- 1);
1394 ealign
= lsp
->ls_vp_comp_size
;
1396 ealign
= lsp
->ls_comp_seg_index
[eblkno
+ 1];
1400 * Preserve original request paramaters
1402 oblkcount
= bp
->b_bcount
;
1405 * Assign the calculated parameters
1407 comp_data_sz
= ealign
- salign
;
1408 bp
->b_bcount
= comp_data_sz
;
1411 * Buffers to hold compressed segments are pre-allocated
1412 * on a per-thread basis. Find a pre-allocated buffer
1413 * that is not currently in use and mark it for use.
1415 mutex_enter(&lsp
->ls_comp_bufs_lock
);
1416 for (j
= 0; j
< lofi_taskq_nthreads
; j
++) {
1417 if (lsp
->ls_comp_bufs
[j
].inuse
== 0) {
1418 lsp
->ls_comp_bufs
[j
].inuse
= 1;
1423 mutex_exit(&lsp
->ls_comp_bufs_lock
);
1424 ASSERT(j
< lofi_taskq_nthreads
);
1427 * If the pre-allocated buffer size does not match
1428 * the size of the I/O request, re-allocate it with
1429 * the appropriate size
1431 if (lsp
->ls_comp_bufs
[j
].bufsize
< bp
->b_bcount
) {
1432 if (lsp
->ls_comp_bufs
[j
].bufsize
> 0)
1433 kmem_free(lsp
->ls_comp_bufs
[j
].buf
,
1434 lsp
->ls_comp_bufs
[j
].bufsize
);
1435 lsp
->ls_comp_bufs
[j
].buf
= kmem_alloc(bp
->b_bcount
,
1437 lsp
->ls_comp_bufs
[j
].bufsize
= bp
->b_bcount
;
1439 compressed_seg
= lsp
->ls_comp_bufs
[j
].buf
;
1442 * Map in the calculated number of blocks
1444 error
= lofi_mapped_rdwr((caddr_t
)compressed_seg
, salign
,
1447 bp
->b_bcount
= oblkcount
;
1448 bp
->b_resid
= oblkcount
;
1453 * decompress compressed blocks start
1455 cmpbuf
= compressed_seg
+ sdiff
;
1456 for (i
= sblkno
; i
<= eblkno
; i
++) {
1457 ASSERT(i
< lsp
->ls_comp_index_sz
- 1);
1461 * The last segment is special in that it is
1462 * most likely not going to be the same
1463 * (uncompressed) size as the other segments.
1465 if (i
== (lsp
->ls_comp_index_sz
- 2)) {
1466 seglen
= lsp
->ls_uncomp_last_seg_sz
;
1468 seglen
= lsp
->ls_uncomp_seg_sz
;
1472 * Each of the segment index entries contains
1473 * the starting block number for that segment.
1474 * The number of compressed bytes in a segment
1475 * is thus the difference between the starting
1476 * block number of this segment and the starting
1477 * block number of the next segment.
1479 cmpbytes
= lsp
->ls_comp_seg_index
[i
+ 1] -
1480 lsp
->ls_comp_seg_index
[i
];
1483 * The first byte in a compressed segment is a flag
1484 * that indicates whether this segment is compressed
1487 * The variable 'useg' is used (instead of
1488 * uncompressed_seg) in this loop to keep a
1489 * reference to the uncompressed segment.
1491 * N.B. If 'useg' is replaced with uncompressed_seg,
1492 * it leads to memory leaks and heap corruption in
1493 * corner cases where compressed segments lie
1494 * adjacent to uncompressed segments.
1496 if (*cmpbuf
== UNCOMPRESSED
) {
1497 useg
= cmpbuf
+ SEGHDR
;
1499 if (uncompressed_seg
== NULL
)
1501 kmem_alloc(lsp
->ls_uncomp_seg_sz
,
1503 useg
= uncompressed_seg
;
1504 uncompressed_seg_index
= i
;
1506 if (li
->l_decompress((cmpbuf
+ SEGHDR
),
1507 (cmpbytes
- SEGHDR
), uncompressed_seg
,
1508 &seglen
, li
->l_level
) != 0) {
1515 * Determine how much uncompressed data we
1516 * have to copy and copy it
1518 xfersize
= lsp
->ls_uncomp_seg_sz
- sblkoff
;
1520 xfersize
-= (lsp
->ls_uncomp_seg_sz
- eblkoff
);
1522 bcopy((useg
+ sblkoff
), bufaddr
, xfersize
);
1525 bufaddr
+= xfersize
;
1526 bp
->b_resid
-= xfersize
;
1529 if (bp
->b_resid
== 0)
1531 } /* decompress compressed blocks ends */
1534 * Skip to done if there is no uncompressed data to cache
1536 if (uncompressed_seg
== NULL
)
1540 * Add the data for the last decompressed segment to
1543 * In case the uncompressed segment data was added to (and
1544 * is referenced by) the cache, make sure we don't free it
1547 mutex_enter(&lsp
->ls_comp_cache_lock
);
1548 if ((lc
= lofi_add_comp_data(lsp
, uncompressed_seg_index
,
1549 uncompressed_seg
)) != NULL
) {
1550 uncompressed_seg
= NULL
;
1552 mutex_exit(&lsp
->ls_comp_cache_lock
);
1555 if (compressed_seg
!= NULL
) {
1556 mutex_enter(&lsp
->ls_comp_bufs_lock
);
1557 lsp
->ls_comp_bufs
[j
].inuse
= 0;
1558 mutex_exit(&lsp
->ls_comp_bufs_lock
);
1560 if (uncompressed_seg
!= NULL
)
1561 kmem_free(uncompressed_seg
, lsp
->ls_uncomp_seg_sz
);
1562 } /* end of handling compressed files */
1564 if ((error
== 0) && (syncflag
!= 0))
1565 error
= VOP_FSYNC(lsp
->ls_vp
, syncflag
, kcred
, NULL
);
1568 if (bufinited
&& lsp
->ls_kstat
) {
1569 size_t n_done
= bp
->b_bcount
- bp
->b_resid
;
1572 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1573 kioptr
= KSTAT_IO_PTR(lsp
->ls_kstat
);
1574 if (bp
->b_flags
& B_READ
) {
1575 kioptr
->nread
+= n_done
;
1578 kioptr
->nwritten
+= n_done
;
1581 kstat_runq_exit(kioptr
);
1582 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1585 mutex_enter(&lsp
->ls_vp_lock
);
1586 if (--lsp
->ls_vp_iocount
== 0)
1587 cv_broadcast(&lsp
->ls_vp_cv
);
1588 mutex_exit(&lsp
->ls_vp_lock
);
1590 bioerror(bp
, error
);
1595 lofi_strategy(struct buf
*bp
)
1597 struct lofi_state
*lsp
;
1605 * We cannot just do I/O here, because the current thread
1606 * _might_ end up back in here because the underlying filesystem
1607 * wants a buffer, which eventually gets into bio_recycle and
1608 * might call into lofi to write out a delayed-write buffer.
1609 * This is bad if the filesystem above lofi is the same as below.
1611 * We could come up with a complex strategy using threads to
1612 * do the I/O asynchronously, or we could use task queues. task
1613 * queues were incredibly easy so they win.
1616 lsp
= ddi_get_soft_state(lofi_statep
,
1617 LOFI_MINOR2ID(getminor(bp
->b_edev
)));
1618 part
= LOFI_PART(getminor(bp
->b_edev
));
1621 bioerror(bp
, ENXIO
);
1625 shift
= lsp
->ls_lbshift
;
1628 p_nblks
= lsp
->ls_vp_size
>> shift
;
1630 if (lsp
->ls_cmlbhandle
!= NULL
) {
1631 if (cmlb_partinfo(lsp
->ls_cmlbhandle
, part
, &p_nblks
, &p_lba
,
1633 bioerror(bp
, ENXIO
);
1639 /* start block past partition end? */
1640 if (bp
->b_lblkno
> p_nblks
) {
1641 bioerror(bp
, ENXIO
);
1646 offset
= (bp
->b_lblkno
+p_lba
) << shift
; /* offset within file */
1648 mutex_enter(&lsp
->ls_vp_lock
);
1649 if (lsp
->ls_vp
== NULL
|| lsp
->ls_vp_closereq
) {
1652 mutex_exit(&lsp
->ls_vp_lock
);
1656 if (lsp
->ls_crypto_enabled
) {
1657 /* encrypted data really begins after crypto header */
1658 offset
+= lsp
->ls_crypto_offset
;
1661 /* make sure we will not pass the file or partition size */
1662 if (offset
== lsp
->ls_vp_size
||
1663 offset
== (((p_lba
+ p_nblks
) << shift
) + lsp
->ls_crypto_offset
)) {
1665 if ((bp
->b_flags
& B_READ
) != 0) {
1666 bp
->b_resid
= bp
->b_bcount
;
1669 /* writes should fail */
1670 bioerror(bp
, ENXIO
);
1673 mutex_exit(&lsp
->ls_vp_lock
);
1676 if ((offset
> lsp
->ls_vp_size
) ||
1677 (offset
> (((p_lba
+ p_nblks
) << shift
) + lsp
->ls_crypto_offset
)) ||
1678 ((offset
+ bp
->b_bcount
) > ((p_lba
+ p_nblks
) << shift
))) {
1679 bioerror(bp
, ENXIO
);
1681 mutex_exit(&lsp
->ls_vp_lock
);
1685 mutex_exit(&lsp
->ls_vp_lock
);
1687 if (lsp
->ls_kstat
) {
1688 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1689 kstat_waitq_enter(KSTAT_IO_PTR(lsp
->ls_kstat
));
1690 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1692 bp
->b_private
= (void *)(uintptr_t)p_lba
; /* partition start */
1693 (void) taskq_dispatch(lsp
->ls_taskq
, lofi_strategy_task
, bp
, KM_SLEEP
);
1699 lofi_read(dev_t dev
, struct uio
*uio
, struct cred
*credp
)
1701 if (getminor(dev
) == 0)
1704 return (physio(lofi_strategy
, NULL
, dev
, B_READ
, minphys
, uio
));
1709 lofi_write(dev_t dev
, struct uio
*uio
, struct cred
*credp
)
1711 if (getminor(dev
) == 0)
1714 return (physio(lofi_strategy
, NULL
, dev
, B_WRITE
, minphys
, uio
));
1719 lofi_aread(dev_t dev
, struct aio_req
*aio
, struct cred
*credp
)
1721 if (getminor(dev
) == 0)
1723 UIO_CHECK(aio
->aio_uio
);
1724 return (aphysio(lofi_strategy
, anocancel
, dev
, B_READ
, minphys
, aio
));
1729 lofi_awrite(dev_t dev
, struct aio_req
*aio
, struct cred
*credp
)
1731 if (getminor(dev
) == 0)
1733 UIO_CHECK(aio
->aio_uio
);
1734 return (aphysio(lofi_strategy
, anocancel
, dev
, B_WRITE
, minphys
, aio
));
1739 lofi_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
1741 struct lofi_state
*lsp
;
1742 dev_t dev
= (dev_t
)arg
;
1745 instance
= LOFI_MINOR2ID(getminor(dev
));
1747 case DDI_INFO_DEVT2DEVINFO
:
1748 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1750 return (DDI_FAILURE
);
1751 *result
= lsp
->ls_dip
;
1752 return (DDI_SUCCESS
);
1753 case DDI_INFO_DEVT2INSTANCE
:
1754 *result
= (void *) (intptr_t)instance
;
1755 return (DDI_SUCCESS
);
1757 return (DDI_FAILURE
);
1761 lofi_create_minor_nodes(struct lofi_state
*lsp
, boolean_t labeled
)
1764 int instance
= ddi_get_instance(lsp
->ls_dip
);
1766 if (labeled
== B_TRUE
) {
1767 cmlb_alloc_handle(&lsp
->ls_cmlbhandle
);
1768 error
= cmlb_attach(lsp
->ls_dip
, &lofi_tg_ops
, DTYPE_DIRECT
,
1769 B_FALSE
, B_FALSE
, DDI_NT_BLOCK_CHAN
,
1770 CMLB_CREATE_P0_MINOR_NODE
, lsp
->ls_cmlbhandle
, (void *)1);
1772 if (error
!= DDI_SUCCESS
) {
1773 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
1774 lsp
->ls_cmlbhandle
= NULL
;
1778 /* create minor nodes */
1779 error
= ddi_create_minor_node(lsp
->ls_dip
, LOFI_BLOCK_NODE
,
1780 S_IFBLK
, LOFI_ID2MINOR(instance
), DDI_PSEUDO
, 0);
1781 if (error
== DDI_SUCCESS
) {
1782 error
= ddi_create_minor_node(lsp
->ls_dip
,
1783 LOFI_CHAR_NODE
, S_IFCHR
, LOFI_ID2MINOR(instance
),
1785 if (error
!= DDI_SUCCESS
) {
1786 ddi_remove_minor_node(lsp
->ls_dip
,
1797 lofi_zone_bind(struct lofi_state
*lsp
)
1801 mutex_enter(&curproc
->p_lock
);
1802 if ((error
= rctl_incr_lofi(curproc
, curproc
->p_zone
, 1)) != 0) {
1803 mutex_exit(&curproc
->p_lock
);
1806 mutex_exit(&curproc
->p_lock
);
1808 if (ddi_prop_update_string(lsp
->ls_dev
, lsp
->ls_dip
, ZONE_PROP_NAME
,
1809 (char *)curproc
->p_zone
->zone_name
) != DDI_PROP_SUCCESS
) {
1810 rctl_decr_lofi(curproc
->p_zone
, 1);
1813 zone_init_ref(&lsp
->ls_zone
);
1814 zone_hold_ref(curzone
, &lsp
->ls_zone
, ZONE_REF_LOFI
);
1820 lofi_zone_unbind(struct lofi_state
*lsp
)
1822 (void) ddi_prop_remove(DDI_DEV_T_NONE
, lsp
->ls_dip
, ZONE_PROP_NAME
);
1823 rctl_decr_lofi(curproc
->p_zone
, 1);
1824 zone_rele_ref(&lsp
->ls_zone
, ZONE_REF_LOFI
);
1828 lofi_online_dev(dev_info_t
*dip
)
1832 int instance
= ddi_get_instance(dip
);
1833 struct lofi_state
*lsp
;
1836 if (ddi_prop_exists(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
, "labeled"))
1839 /* lsp alloc+init, soft state is freed in lofi_detach */
1840 error
= ddi_soft_state_zalloc(lofi_statep
, instance
);
1841 if (error
== DDI_FAILURE
) {
1845 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1848 if ((error
= lofi_zone_bind(lsp
)) != 0)
1851 cv_init(&lsp
->ls_vp_cv
, NULL
, CV_DRIVER
, NULL
);
1852 mutex_init(&lsp
->ls_comp_cache_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1853 mutex_init(&lsp
->ls_comp_bufs_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1854 mutex_init(&lsp
->ls_kstat_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1855 mutex_init(&lsp
->ls_vp_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1857 if ((error
= lofi_create_minor_nodes(lsp
, labeled
)) != 0) {
1858 lofi_zone_unbind(lsp
);
1862 /* driver handles kernel-issued IOCTLs */
1863 if (ddi_prop_create(DDI_DEV_T_NONE
, dip
, DDI_PROP_CANSLEEP
,
1864 DDI_KERNEL_IOCTL
, NULL
, 0) != DDI_PROP_SUCCESS
) {
1865 error
= DDI_FAILURE
;
1869 lsp
->ls_kstat
= kstat_create_zone(LOFI_DRIVER_NAME
, instance
,
1870 NULL
, "disk", KSTAT_TYPE_IO
, 1, 0, getzoneid());
1871 if (lsp
->ls_kstat
== NULL
) {
1872 (void) ddi_prop_remove(DDI_DEV_T_NONE
, lsp
->ls_dip
,
1878 lsp
->ls_kstat
->ks_lock
= &lsp
->ls_kstat_lock
;
1879 kstat_zone_add(lsp
->ls_kstat
, GLOBAL_ZONEID
);
1880 kstat_install(lsp
->ls_kstat
);
1881 return (DDI_SUCCESS
);
1883 if (lsp
->ls_cmlbhandle
!= NULL
) {
1884 cmlb_detach(lsp
->ls_cmlbhandle
, 0);
1885 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
1887 ddi_remove_minor_node(dip
, NULL
);
1888 lofi_zone_unbind(lsp
);
1890 mutex_destroy(&lsp
->ls_comp_cache_lock
);
1891 mutex_destroy(&lsp
->ls_comp_bufs_lock
);
1892 mutex_destroy(&lsp
->ls_kstat_lock
);
1893 mutex_destroy(&lsp
->ls_vp_lock
);
1894 cv_destroy(&lsp
->ls_vp_cv
);
1896 ddi_soft_state_free(lofi_statep
, instance
);
1902 lofi_dev_callback(sysevent_t
*ev
, void *cookie
)
1905 char *class, *driver
;
1909 class = sysevent_get_class_name(ev
);
1910 if (strcmp(class, EC_DEV_ADD
) && strcmp(class, EC_DEV_REMOVE
))
1913 (void) sysevent_get_attr_list(ev
, &nvlist
);
1914 driver
= fnvlist_lookup_string(nvlist
, DEV_DRIVER_NAME
);
1915 instance
= fnvlist_lookup_int32(nvlist
, DEV_INSTANCE
);
1917 if (strcmp(driver
, LOFI_DRIVER_NAME
) != 0) {
1918 fnvlist_free(nvlist
);
1923 * insert or remove device info, then announce the change
1925 * This allows the MAP/UNMAP to monitor device change.
1927 (void) snprintf(name
, sizeof (name
), "%d", instance
);
1928 if (strcmp(class, EC_DEV_ADD
) == 0) {
1929 mutex_enter(&lofi_chan_lock
);
1930 fnvlist_add_nvlist(lofi_devlink_cache
, name
, nvlist
);
1931 cv_broadcast(&lofi_chan_cv
);
1932 mutex_exit(&lofi_chan_lock
);
1933 } else if (strcmp(class, EC_DEV_REMOVE
) == 0) {
1934 mutex_enter(&lofi_chan_lock
);
1935 /* Can not use fnvlist_remove() as we can get ENOENT. */
1936 (void) nvlist_remove_all(lofi_devlink_cache
, name
);
1937 cv_broadcast(&lofi_chan_cv
);
1938 mutex_exit(&lofi_chan_lock
);
1941 fnvlist_free(nvlist
);
1946 lofi_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
1949 int instance
= ddi_get_instance(dip
);
1950 struct lofi_state
*lsp
;
1952 if (cmd
!= DDI_ATTACH
)
1953 return (DDI_FAILURE
);
1956 * Instance 0 is control instance, attaching control instance
1957 * will set the lofi up and ready.
1959 if (instance
== 0) {
1960 rv
= ddi_soft_state_zalloc(lofi_statep
, 0);
1961 if (rv
== DDI_FAILURE
) {
1962 return (DDI_FAILURE
);
1964 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1965 rv
= ddi_create_minor_node(dip
, LOFI_CTL_NODE
, S_IFCHR
, 0,
1967 if (rv
== DDI_FAILURE
) {
1968 ddi_soft_state_free(lofi_statep
, 0);
1969 return (DDI_FAILURE
);
1971 /* driver handles kernel-issued IOCTLs */
1972 if (ddi_prop_create(DDI_DEV_T_NONE
, dip
, DDI_PROP_CANSLEEP
,
1973 DDI_KERNEL_IOCTL
, NULL
, 0) != DDI_PROP_SUCCESS
) {
1974 ddi_remove_minor_node(dip
, NULL
);
1975 ddi_soft_state_free(lofi_statep
, 0);
1976 return (DDI_FAILURE
);
1979 rv
= sysevent_evc_bind(DEVFS_CHANNEL
, &lofi_chan
,
1980 EVCH_CREAT
| EVCH_HOLD_PEND
);
1982 rv
= sysevent_evc_subscribe(lofi_chan
, "lofi",
1983 EC_ALL
, lofi_dev_callback
, NULL
, 0);
1984 rv
|= sysevent_evc_subscribe(lofi_chan
, "disk",
1985 EC_ALL
, lofi_dev_callback
, NULL
, 0);
1989 if (lofi_chan
!= NULL
)
1990 (void) sysevent_evc_unbind(lofi_chan
);
1991 ddi_prop_remove_all(dip
);
1992 ddi_remove_minor_node(dip
, NULL
);
1993 ddi_soft_state_free(lofi_statep
, 0);
1994 return (DDI_FAILURE
);
1996 zone_key_create(&lofi_zone_key
, NULL
, lofi_zone_shutdown
, NULL
);
2000 if (lofi_online_dev(dip
) == DDI_FAILURE
)
2001 return (DDI_FAILURE
);
2004 ddi_report_dev(dip
);
2005 return (DDI_SUCCESS
);
2009 lofi_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
2011 struct lofi_state
*lsp
;
2012 int instance
= ddi_get_instance(dip
);
2014 if (cmd
!= DDI_DETACH
)
2015 return (DDI_FAILURE
);
2018 * If the instance is not 0, release state.
2019 * The instance 0 is control device, we can not detach it
2020 * before other instances are detached.
2022 if (instance
!= 0) {
2023 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
2024 if (lsp
!= NULL
&& lsp
->ls_vp_ready
== B_FALSE
) {
2025 ddi_soft_state_free(lofi_statep
, instance
);
2026 return (DDI_SUCCESS
);
2028 return (DDI_FAILURE
);
2030 mutex_enter(&lofi_lock
);
2032 if (!list_is_empty(&lofi_list
)) {
2033 mutex_exit(&lofi_lock
);
2034 return (DDI_FAILURE
);
2037 ddi_remove_minor_node(dip
, NULL
);
2038 ddi_prop_remove_all(dip
);
2040 mutex_exit(&lofi_lock
);
2042 (void) sysevent_evc_unbind(lofi_chan
);
2043 if (zone_key_delete(lofi_zone_key
) != 0)
2044 cmn_err(CE_WARN
, "failed to delete zone key");
2046 ddi_soft_state_free(lofi_statep
, 0);
2048 return (DDI_SUCCESS
);
2052 * With the addition of encryption, we must be careful that encryption key is
2053 * wiped before kernel's data structures are freed so it cannot accidentally
2054 * slip out to userland through uninitialized data elsewhere.
2057 free_lofi_ioctl(struct lofi_ioctl
*klip
)
2059 /* Make sure this encryption key doesn't stick around */
2060 bzero(klip
->li_key
, sizeof (klip
->li_key
));
2061 kmem_free(klip
, sizeof (struct lofi_ioctl
));
2065 * These two functions simplify the rest of the ioctls that need to copyin/out
2066 * the lofi_ioctl structure.
2069 copy_in_lofi_ioctl(const struct lofi_ioctl
*ulip
, struct lofi_ioctl
**klipp
,
2072 struct lofi_ioctl
*klip
;
2075 klip
= *klipp
= kmem_alloc(sizeof (struct lofi_ioctl
), KM_SLEEP
);
2076 error
= ddi_copyin(ulip
, klip
, sizeof (struct lofi_ioctl
), flag
);
2080 /* ensure NULL termination */
2081 klip
->li_filename
[MAXPATHLEN
-1] = '\0';
2082 klip
->li_devpath
[MAXPATHLEN
-1] = '\0';
2083 klip
->li_algorithm
[MAXALGLEN
-1] = '\0';
2084 klip
->li_cipher
[CRYPTO_MAX_MECH_NAME
-1] = '\0';
2085 klip
->li_iv_cipher
[CRYPTO_MAX_MECH_NAME
-1] = '\0';
2087 if (klip
->li_id
> L_MAXMIN32
) {
2095 free_lofi_ioctl(klip
);
2100 copy_out_lofi_ioctl(const struct lofi_ioctl
*klip
, struct lofi_ioctl
*ulip
,
2106 * NOTE: Do NOT copy the crypto_key_t "back" to userland.
2107 * This ensures that an attacker can't trivially find the
2108 * key for a mapping just by issuing the ioctl.
2110 * It can still be found by poking around in kmem with mdb(1),
2111 * but there is no point in making it easy when the info isn't
2112 * of any use in this direction anyway.
2114 * Either way we don't actually have the raw key stored in
2115 * a form that we can get it anyway, since we just used it
2116 * to create a ctx template and didn't keep "the original".
2118 error
= ddi_copyout(klip
, ulip
, sizeof (struct lofi_ioctl
), flag
);
2125 lofi_access(struct lofi_state
*lsp
)
2127 ASSERT(MUTEX_HELD(&lofi_lock
));
2128 if (INGLOBALZONE(curproc
) || lsp
->ls_zone
.zref_zone
== curzone
)
2134 * Find the lofi state for the given filename. We compare by vnode to
2135 * allow the global zone visibility into NGZ lofi nodes.
2138 file_to_lofi_nocheck(char *filename
, boolean_t readonly
,
2139 struct lofi_state
**lspp
)
2141 struct lofi_state
*lsp
;
2146 ASSERT(MUTEX_HELD(&lofi_lock
));
2148 if ((err
= lookupname(filename
, UIO_SYSSPACE
, FOLLOW
,
2149 NULLVPP
, &vp
)) != 0)
2152 if (vp
->v_type
== VREG
) {
2154 if (VOP_REALVP(vp
, &realvp
, NULL
) == 0) {
2161 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
;
2162 lsp
= list_next(&lofi_list
, lsp
)) {
2163 if (lsp
->ls_vp
== vp
) {
2166 if (lsp
->ls_readonly
) {
2168 /* Skip if '-r' is specified */
2179 * If a filename is given as an argument for lofi_unmap, we shouldn't
2180 * allow unmap if there are multiple read-only lofi devices associated
2186 else if (rdfiles
> 1)
2197 * Find the minor for the given filename, checking the zone can access
2201 file_to_lofi(char *filename
, boolean_t readonly
, struct lofi_state
**lspp
)
2205 ASSERT(MUTEX_HELD(&lofi_lock
));
2207 if ((err
= file_to_lofi_nocheck(filename
, readonly
, lspp
)) != 0)
2210 if ((err
= lofi_access(*lspp
)) != 0)
2217 * Fakes up a disk geometry based on the size of the file. This is needed
2218 * to support newfs on traditional lofi device, but also will provide
2219 * geometry hint for cmlb.
2222 fake_disk_geometry(struct lofi_state
*lsp
)
2224 u_offset_t dsize
= lsp
->ls_vp_size
- lsp
->ls_crypto_offset
;
2226 /* dk_geom - see dkio(7I) */
2228 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
2229 * of sectors), but that breaks programs like fdisk which want to
2230 * partition a disk by cylinder. With one cylinder, you can't create
2231 * an fdisk partition and put pcfs on it for testing (hard to pick
2232 * a number between one and one).
2234 * The cheezy floppy test is an attempt to not have too few cylinders
2235 * for a small file, or so many on a big file that you waste space
2236 * for backup superblocks or cylinder group structures.
2238 bzero(&lsp
->ls_dkg
, sizeof (lsp
->ls_dkg
));
2239 if (dsize
< (2 * 1024 * 1024)) /* floppy? */
2240 lsp
->ls_dkg
.dkg_ncyl
= dsize
/ (100 * 1024);
2242 lsp
->ls_dkg
.dkg_ncyl
= dsize
/ (300 * 1024);
2243 /* in case file file is < 100k */
2244 if (lsp
->ls_dkg
.dkg_ncyl
== 0)
2245 lsp
->ls_dkg
.dkg_ncyl
= 1;
2247 lsp
->ls_dkg
.dkg_pcyl
= lsp
->ls_dkg
.dkg_ncyl
;
2248 lsp
->ls_dkg
.dkg_nhead
= 1;
2249 lsp
->ls_dkg
.dkg_rpm
= 7200;
2251 lsp
->ls_dkg
.dkg_nsect
= dsize
/
2252 (lsp
->ls_dkg
.dkg_ncyl
<< lsp
->ls_pbshift
);
2256 * build vtoc - see dkio(7I)
2258 * Fakes one big partition based on the size of the file. This is needed
2259 * because we allow newfs'ing the traditional lofi device and newfs will
2260 * do several disk ioctls to figure out the geometry and partition information.
2261 * It uses that information to determine the parameters to pass to mkfs.
2264 fake_disk_vtoc(struct lofi_state
*lsp
, struct vtoc
*vt
)
2266 bzero(vt
, sizeof (struct vtoc
));
2267 vt
->v_sanity
= VTOC_SANE
;
2268 vt
->v_version
= V_VERSION
;
2269 (void) strncpy(vt
->v_volume
, LOFI_DRIVER_NAME
,
2270 sizeof (vt
->v_volume
));
2271 vt
->v_sectorsz
= 1 << lsp
->ls_pbshift
;
2273 vt
->v_part
[0].p_tag
= V_UNASSIGNED
;
2276 * A compressed file is read-only, other files can
2279 if (lsp
->ls_uncomp_seg_sz
> 0) {
2280 vt
->v_part
[0].p_flag
= V_UNMNT
| V_RONLY
;
2282 vt
->v_part
[0].p_flag
= V_UNMNT
;
2284 vt
->v_part
[0].p_start
= (daddr_t
)0;
2286 * The partition size cannot just be the number of sectors, because
2287 * that might not end on a cylinder boundary. And if that's the case,
2288 * newfs/mkfs will print a scary warning. So just figure the size
2289 * based on the number of cylinders and sectors/cylinder.
2291 vt
->v_part
[0].p_size
= lsp
->ls_dkg
.dkg_pcyl
*
2292 lsp
->ls_dkg
.dkg_nsect
* lsp
->ls_dkg
.dkg_nhead
;
2296 * build dk_cinfo - see dkio(7I)
2299 fake_disk_info(dev_t dev
, struct dk_cinfo
*ci
)
2301 bzero(ci
, sizeof (struct dk_cinfo
));
2302 (void) strlcpy(ci
->dki_cname
, LOFI_DRIVER_NAME
, sizeof (ci
->dki_cname
));
2303 ci
->dki_ctype
= DKC_SCSI_CCS
;
2304 (void) strlcpy(ci
->dki_dname
, LOFI_DRIVER_NAME
, sizeof (ci
->dki_dname
));
2305 ci
->dki_unit
= LOFI_MINOR2ID(getminor(dev
));
2306 ci
->dki_partition
= LOFI_PART(getminor(dev
));
2308 * newfs uses this to set maxcontig. Must not be < 16, or it
2309 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
2310 * it by the block size. Then tunefs doesn't work because
2313 ci
->dki_maxtransfer
= 16;
2317 * map in a compressed file
2319 * Read in the header and the index that follows.
2321 * The header is as follows -
2323 * Signature (name of the compression algorithm)
2324 * Compression segment size (a multiple of 512)
2325 * Number of index entries
2326 * Size of the last block
2327 * The array containing the index entries
2329 * The header information is always stored in
2330 * network byte order on disk.
2333 lofi_map_compressed_file(struct lofi_state
*lsp
, char *buf
)
2335 uint32_t index_sz
, header_len
, i
;
2341 /* The signature has already been read */
2342 tbuf
+= sizeof (lsp
->ls_comp_algorithm
);
2343 bcopy(tbuf
, &(lsp
->ls_uncomp_seg_sz
), sizeof (lsp
->ls_uncomp_seg_sz
));
2344 lsp
->ls_uncomp_seg_sz
= ntohl(lsp
->ls_uncomp_seg_sz
);
2347 * The compressed segment size must be a power of 2
2349 if (lsp
->ls_uncomp_seg_sz
< DEV_BSIZE
||
2350 !ISP2(lsp
->ls_uncomp_seg_sz
))
2353 for (i
= 0; !((lsp
->ls_uncomp_seg_sz
>> i
) & 1); i
++)
2356 lsp
->ls_comp_seg_shift
= i
;
2358 tbuf
+= sizeof (lsp
->ls_uncomp_seg_sz
);
2359 bcopy(tbuf
, &(lsp
->ls_comp_index_sz
), sizeof (lsp
->ls_comp_index_sz
));
2360 lsp
->ls_comp_index_sz
= ntohl(lsp
->ls_comp_index_sz
);
2362 tbuf
+= sizeof (lsp
->ls_comp_index_sz
);
2363 bcopy(tbuf
, &(lsp
->ls_uncomp_last_seg_sz
),
2364 sizeof (lsp
->ls_uncomp_last_seg_sz
));
2365 lsp
->ls_uncomp_last_seg_sz
= ntohl(lsp
->ls_uncomp_last_seg_sz
);
2368 * Compute the total size of the uncompressed data
2369 * for use in fake_disk_geometry and other calculations.
2370 * Disk geometry has to be faked with respect to the
2371 * actual uncompressed data size rather than the
2372 * compressed file size.
2375 (u_offset_t
)(lsp
->ls_comp_index_sz
- 2) * lsp
->ls_uncomp_seg_sz
2376 + lsp
->ls_uncomp_last_seg_sz
;
2379 * Index size is rounded up to DEV_BSIZE for ease
2382 index_sz
= sizeof (*lsp
->ls_comp_seg_index
) * lsp
->ls_comp_index_sz
;
2383 header_len
= sizeof (lsp
->ls_comp_algorithm
) +
2384 sizeof (lsp
->ls_uncomp_seg_sz
) +
2385 sizeof (lsp
->ls_comp_index_sz
) +
2386 sizeof (lsp
->ls_uncomp_last_seg_sz
);
2387 lsp
->ls_comp_offbase
= header_len
+ index_sz
;
2389 index_sz
+= header_len
;
2390 index_sz
= roundup(index_sz
, DEV_BSIZE
);
2392 lsp
->ls_comp_index_data
= kmem_alloc(index_sz
, KM_SLEEP
);
2393 lsp
->ls_comp_index_data_sz
= index_sz
;
2396 * Read in the index -- this has a side-effect
2397 * of reading in the header as well
2400 error
= vn_rdwr(rw
, lsp
->ls_vp
, lsp
->ls_comp_index_data
, index_sz
,
2401 0, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2406 /* Skip the header, this is where the index really begins */
2407 lsp
->ls_comp_seg_index
=
2409 (uint64_t *)(lsp
->ls_comp_index_data
+ header_len
);
2412 * Now recompute offsets in the index to account for
2415 for (i
= 0; i
< lsp
->ls_comp_index_sz
; i
++) {
2416 lsp
->ls_comp_seg_index
[i
] = lsp
->ls_comp_offbase
+
2417 BE_64(lsp
->ls_comp_seg_index
[i
]);
2424 lofi_init_crypto(struct lofi_state
*lsp
, struct lofi_ioctl
*klip
)
2426 struct crypto_meta chead
;
2427 char buf
[DEV_BSIZE
];
2434 if (!klip
->li_crypto_enabled
)
2438 * All current algorithms have a max of 448 bits.
2440 if (klip
->li_iv_len
> CRYPTO_BITS2BYTES(512))
2443 if (CRYPTO_BITS2BYTES(klip
->li_key_len
) > sizeof (klip
->li_key
))
2446 lsp
->ls_crypto_enabled
= klip
->li_crypto_enabled
;
2448 mutex_init(&lsp
->ls_crypto_lock
, NULL
, MUTEX_DRIVER
, NULL
);
2450 lsp
->ls_mech
.cm_type
= crypto_mech2id(klip
->li_cipher
);
2451 if (lsp
->ls_mech
.cm_type
== CRYPTO_MECH_INVALID
) {
2452 cmn_err(CE_WARN
, "invalid cipher %s requested for %s",
2453 klip
->li_cipher
, klip
->li_filename
);
2457 /* this is just initialization here */
2458 lsp
->ls_mech
.cm_param
= NULL
;
2459 lsp
->ls_mech
.cm_param_len
= 0;
2461 lsp
->ls_iv_type
= klip
->li_iv_type
;
2462 lsp
->ls_iv_mech
.cm_type
= crypto_mech2id(klip
->li_iv_cipher
);
2463 if (lsp
->ls_iv_mech
.cm_type
== CRYPTO_MECH_INVALID
) {
2464 cmn_err(CE_WARN
, "invalid iv cipher %s requested"
2465 " for %s", klip
->li_iv_cipher
, klip
->li_filename
);
2469 /* iv mech must itself take a null iv */
2470 lsp
->ls_iv_mech
.cm_param
= NULL
;
2471 lsp
->ls_iv_mech
.cm_param_len
= 0;
2472 lsp
->ls_iv_len
= klip
->li_iv_len
;
2475 * Create ctx using li_cipher & the raw li_key after checking
2476 * that it isn't a weak key.
2478 lsp
->ls_key
.ck_format
= CRYPTO_KEY_RAW
;
2479 lsp
->ls_key
.ck_length
= klip
->li_key_len
;
2480 lsp
->ls_key
.ck_data
= kmem_alloc(
2481 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
), KM_SLEEP
);
2482 bcopy(klip
->li_key
, lsp
->ls_key
.ck_data
,
2483 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
2485 ret
= crypto_key_check(&lsp
->ls_mech
, &lsp
->ls_key
);
2486 if (ret
!= CRYPTO_SUCCESS
) {
2487 cmn_err(CE_WARN
, "weak key check failed for cipher "
2488 "%s on file %s (0x%x)", klip
->li_cipher
,
2489 klip
->li_filename
, ret
);
2493 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, buf
, DEV_BSIZE
,
2494 CRYOFF
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2499 * This is the case where the header in the lofi image is already
2500 * initialized to indicate it is encrypted.
2502 if (strncmp(buf
, lofi_crypto_magic
, sizeof (lofi_crypto_magic
)) == 0) {
2504 * The encryption header information is laid out this way:
2505 * 6 bytes: hex "CFLOFI"
2506 * 2 bytes: version = 0 ... for now
2507 * 96 bytes: reserved1 (not implemented yet)
2508 * 4 bytes: data_sector = 2 ... for now
2509 * more... not implemented yet
2514 /* copy the magic */
2515 bcopy(marker
, lsp
->ls_crypto
.magic
,
2516 sizeof (lsp
->ls_crypto
.magic
));
2517 marker
+= sizeof (lsp
->ls_crypto
.magic
);
2519 /* read the encryption version number */
2520 bcopy(marker
, &(lsp
->ls_crypto
.version
),
2521 sizeof (lsp
->ls_crypto
.version
));
2522 lsp
->ls_crypto
.version
= ntohs(lsp
->ls_crypto
.version
);
2523 marker
+= sizeof (lsp
->ls_crypto
.version
);
2525 /* read a chunk of reserved data */
2526 bcopy(marker
, lsp
->ls_crypto
.reserved1
,
2527 sizeof (lsp
->ls_crypto
.reserved1
));
2528 marker
+= sizeof (lsp
->ls_crypto
.reserved1
);
2530 /* read block number where encrypted data begins */
2531 bcopy(marker
, &(lsp
->ls_crypto
.data_sector
),
2532 sizeof (lsp
->ls_crypto
.data_sector
));
2533 lsp
->ls_crypto
.data_sector
= ntohl(lsp
->ls_crypto
.data_sector
);
2534 marker
+= sizeof (lsp
->ls_crypto
.data_sector
);
2536 /* and ignore the rest until it is implemented */
2538 lsp
->ls_crypto_offset
= lsp
->ls_crypto
.data_sector
* DEV_BSIZE
;
2543 * We've requested encryption, but no magic was found, so it must be
2547 for (i
= 0; i
< sizeof (struct crypto_meta
); i
++) {
2553 bcopy(lofi_crypto_magic
, marker
, sizeof (lofi_crypto_magic
));
2554 marker
+= sizeof (lofi_crypto_magic
);
2555 chead
.version
= htons(LOFI_CRYPTO_VERSION
);
2556 bcopy(&(chead
.version
), marker
, sizeof (chead
.version
));
2557 marker
+= sizeof (chead
.version
);
2558 marker
+= sizeof (chead
.reserved1
);
2559 chead
.data_sector
= htonl(LOFI_CRYPTO_DATA_SECTOR
);
2560 bcopy(&(chead
.data_sector
), marker
, sizeof (chead
.data_sector
));
2562 /* write the header */
2563 error
= vn_rdwr(UIO_WRITE
, lsp
->ls_vp
, buf
, DEV_BSIZE
,
2564 CRYOFF
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2568 /* fix things up so it looks like we read this info */
2569 bcopy(lofi_crypto_magic
, lsp
->ls_crypto
.magic
,
2570 sizeof (lofi_crypto_magic
));
2571 lsp
->ls_crypto
.version
= LOFI_CRYPTO_VERSION
;
2572 lsp
->ls_crypto
.data_sector
= LOFI_CRYPTO_DATA_SECTOR
;
2573 lsp
->ls_crypto_offset
= lsp
->ls_crypto
.data_sector
* DEV_BSIZE
;
2578 * Check to see if the passed in signature is a valid one. If it is
2579 * valid, return the index into lofi_compress_table.
2581 * Return -1 if it is invalid
2584 lofi_compress_select(const char *signature
)
2588 for (i
= 0; i
< LOFI_COMPRESS_FUNCTIONS
; i
++) {
2589 if (strcmp(lofi_compress_table
[i
].l_name
, signature
) == 0)
2597 lofi_init_compress(struct lofi_state
*lsp
)
2599 char buf
[DEV_BSIZE
];
2604 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, buf
, DEV_BSIZE
, 0, UIO_SYSSPACE
,
2605 0, RLIM64_INFINITY
, kcred
, &resid
);
2610 if ((compress_index
= lofi_compress_select(buf
)) == -1)
2613 /* compression and encryption are mutually exclusive */
2614 if (lsp
->ls_crypto_enabled
)
2617 /* initialize compression info for compressed lofi */
2618 lsp
->ls_comp_algorithm_index
= compress_index
;
2619 (void) strlcpy(lsp
->ls_comp_algorithm
,
2620 lofi_compress_table
[compress_index
].l_name
,
2621 sizeof (lsp
->ls_comp_algorithm
));
2623 /* Finally setup per-thread pre-allocated buffers */
2624 lsp
->ls_comp_bufs
= kmem_zalloc(lofi_taskq_nthreads
*
2625 sizeof (struct compbuf
), KM_SLEEP
);
2627 return (lofi_map_compressed_file(lsp
, buf
));
2631 * Allocate new or proposed id from lofi_id.
2633 * Special cases for proposed id:
2634 * 0: not allowed, 0 is id for control device.
2635 * -1: allocate first usable id from lofi_id.
2636 * any other value is proposed value from userland
2638 * returns DDI_SUCCESS or errno.
2641 lofi_alloc_id(int *idp
)
2643 int id
, error
= DDI_SUCCESS
;
2646 id
= id_allocff_nosleep(lofi_id
);
2651 } else if (*idp
== 0) {
2654 } else if (*idp
> ((1 << (L_BITSMINOR
- LOFI_CMLB_SHIFT
)) - 1)) {
2658 if (ddi_get_soft_state(lofi_statep
, *idp
) != NULL
) {
2663 id
= id_alloc_specific_nosleep(lofi_id
, *idp
);
2675 lofi_create_dev(struct lofi_ioctl
*klip
)
2677 dev_info_t
*parent
, *child
;
2678 struct lofi_state
*lsp
= NULL
;
2679 char namebuf
[MAXNAMELEN
];
2682 /* get control device */
2683 lsp
= ddi_get_soft_state(lofi_statep
, 0);
2684 parent
= ddi_get_parent(lsp
->ls_dip
);
2686 if ((error
= lofi_alloc_id((int *)&klip
->li_id
)))
2689 (void) snprintf(namebuf
, sizeof (namebuf
), LOFI_DRIVER_NAME
"@%d",
2692 ndi_devi_enter(parent
, &circ
);
2693 child
= ndi_devi_findchild(parent
, namebuf
);
2694 ndi_devi_exit(parent
, circ
);
2696 if (child
== NULL
) {
2697 child
= ddi_add_child(parent
, LOFI_DRIVER_NAME
,
2698 (pnode_t
)DEVI_SID_NODEID
, klip
->li_id
);
2699 if ((error
= ddi_prop_update_int(DDI_DEV_T_NONE
, child
,
2700 "instance", klip
->li_id
)) != DDI_PROP_SUCCESS
)
2703 if (klip
->li_labeled
== B_TRUE
) {
2704 if ((error
= ddi_prop_create(DDI_DEV_T_NONE
, child
,
2705 DDI_PROP_CANSLEEP
, "labeled", 0, 0))
2706 != DDI_PROP_SUCCESS
)
2710 if ((error
= ndi_devi_online(child
, NDI_ONLINE_ATTACH
))
2714 id_free(lofi_id
, klip
->li_id
);
2722 ddi_prop_remove_all(child
);
2723 (void) ndi_devi_offline(child
, NDI_DEVI_REMOVE
);
2724 id_free(lofi_id
, klip
->li_id
);
2731 lofi_create_inquiry(struct lofi_state
*lsp
, struct scsi_inquiry
*inq
)
2735 (void) strlcpy(inq
->inq_vid
, LOFI_DRIVER_NAME
, sizeof (inq
->inq_vid
));
2737 mutex_enter(&lsp
->ls_vp_lock
);
2738 if (lsp
->ls_vp
!= NULL
)
2739 p
= strrchr(lsp
->ls_vp
->v_path
, '/');
2741 (void) strncpy(inq
->inq_pid
, p
+ 1, sizeof (inq
->inq_pid
));
2742 mutex_exit(&lsp
->ls_vp_lock
);
2743 (void) strlcpy(inq
->inq_revision
, "1.0", sizeof (inq
->inq_revision
));
2747 * copy devlink name from event cache
2750 lofi_copy_devpath(struct lofi_ioctl
*klip
)
2753 char namebuf
[MAXNAMELEN
], *str
;
2757 if (klip
->li_labeled
== B_TRUE
)
2758 klip
->li_devpath
[0] = '\0';
2760 /* no need to wait for messages */
2761 (void) snprintf(klip
->li_devpath
, sizeof (klip
->li_devpath
),
2762 "/dev/" LOFI_CHAR_NAME
"/%d", klip
->li_id
);
2766 (void) snprintf(namebuf
, sizeof (namebuf
), "%d", klip
->li_id
);
2767 ticks
= ddi_get_lbolt() + LOFI_TIMEOUT
* drv_usectohz(1000000);
2771 mutex_enter(&lofi_chan_lock
);
2772 while (nvlist_lookup_nvlist(lofi_devlink_cache
, namebuf
, &nvl
) != 0) {
2773 error
= cv_timedwait(&lofi_chan_cv
, &lofi_chan_lock
, ticks
);
2779 if (nvlist_lookup_string(nvl
, DEV_NAME
, &str
) == 0) {
2780 (void) strlcpy(klip
->li_devpath
, str
,
2781 sizeof (klip
->li_devpath
));
2784 mutex_exit(&lofi_chan_lock
);
2788 * map a file to a minor number. Return the minor number.
2791 lofi_map_file(dev_t dev
, struct lofi_ioctl
*ulip
, int pickminor
,
2792 int *rvalp
, struct cred
*credp
, int ioctl_flag
)
2795 struct lofi_state
*lsp
= NULL
;
2796 struct lofi_ioctl
*klip
;
2798 struct vnode
*vp
= NULL
;
2801 char namebuf
[MAXNAMELEN
];
2803 error
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
2807 mutex_enter(&lofi_lock
);
2809 if (file_to_lofi_nocheck(klip
->li_filename
, klip
->li_readonly
,
2815 flag
= FREAD
| FWRITE
| FOFFMAX
| FEXCL
;
2816 error
= vn_open(klip
->li_filename
, UIO_SYSSPACE
, flag
, 0, &vp
, 0, 0);
2820 error
= vn_open(klip
->li_filename
, UIO_SYSSPACE
, flag
, 0,
2826 if (!V_ISLOFIABLE(vp
->v_type
)) {
2831 vattr
.va_mask
= AT_SIZE
;
2832 error
= VOP_GETATTR(vp
, &vattr
, 0, credp
, NULL
);
2836 /* the file needs to be a multiple of the block size */
2837 if ((vattr
.va_size
% DEV_BSIZE
) != 0) {
2843 klip
->li_id
= (uint32_t)-1;
2845 if ((error
= lofi_create_dev(klip
)) != 0)
2849 lsp
= ddi_get_soft_state(lofi_statep
, id
);
2854 * from this point lofi_destroy() is used to clean up on error
2855 * make sure the basic data is set
2857 lsp
->ls_dev
= makedevice(getmajor(dev
), LOFI_ID2MINOR(id
));
2859 list_create(&lsp
->ls_comp_cache
, sizeof (struct lofi_comp_cache
),
2860 offsetof(struct lofi_comp_cache
, lc_list
));
2863 * save open mode so file can be closed properly and vnode counts
2864 * updated correctly.
2866 lsp
->ls_openflag
= flag
;
2869 lsp
->ls_stacked_vp
= vp
;
2871 lsp
->ls_vp_size
= vattr
.va_size
;
2872 lsp
->ls_vp_comp_size
= lsp
->ls_vp_size
;
2875 * Try to handle stacked lofs vnodes.
2877 if (vp
->v_type
== VREG
) {
2880 if (VOP_REALVP(vp
, &realvp
, NULL
) == 0) {
2882 * We need to use the realvp for uniqueness
2883 * checking, but keep the stacked vp for
2884 * LOFI_GET_FILENAME display.
2887 lsp
->ls_vp
= realvp
;
2891 lsp
->ls_lbshift
= highbit(DEV_BSIZE
) - 1;
2892 lsp
->ls_pbshift
= lsp
->ls_lbshift
;
2894 lsp
->ls_readonly
= klip
->li_readonly
;
2895 lsp
->ls_uncomp_seg_sz
= 0;
2896 lsp
->ls_comp_algorithm
[0] = '\0';
2897 lsp
->ls_crypto_offset
= 0;
2899 (void) snprintf(namebuf
, sizeof (namebuf
), "%s_taskq_%d",
2900 LOFI_DRIVER_NAME
, id
);
2901 lsp
->ls_taskq
= taskq_create_proc(namebuf
, lofi_taskq_nthreads
,
2902 minclsyspri
, 1, lofi_taskq_maxalloc
, curzone
->zone_zsched
, 0);
2904 if ((error
= lofi_init_crypto(lsp
, klip
)) != 0)
2907 if ((error
= lofi_init_compress(lsp
)) != 0)
2910 fake_disk_geometry(lsp
);
2912 if ((ddi_prop_update_int64(lsp
->ls_dev
, lsp
->ls_dip
, SIZE_PROP_NAME
,
2913 lsp
->ls_vp_size
- lsp
->ls_crypto_offset
)) != DDI_PROP_SUCCESS
) {
2918 if ((ddi_prop_update_int64(lsp
->ls_dev
, lsp
->ls_dip
, NBLOCKS_PROP_NAME
,
2919 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) / DEV_BSIZE
))
2920 != DDI_PROP_SUCCESS
) {
2925 list_insert_tail(&lofi_list
, lsp
);
2927 * Notify we are ready to rock.
2929 mutex_enter(&lsp
->ls_vp_lock
);
2930 lsp
->ls_vp_ready
= B_TRUE
;
2931 cv_broadcast(&lsp
->ls_vp_cv
);
2932 mutex_exit(&lsp
->ls_vp_lock
);
2933 mutex_exit(&lofi_lock
);
2935 lofi_copy_devpath(klip
);
2939 (void) copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
2940 free_lofi_ioctl(klip
);
2945 lofi_destroy(lsp
, credp
);
2948 (void) VOP_PUTPAGE(vp
, 0, 0, B_INVAL
, credp
, NULL
);
2949 (void) VOP_CLOSE(vp
, flag
, 1, 0, credp
, NULL
);
2954 mutex_exit(&lofi_lock
);
2955 free_lofi_ioctl(klip
);
2963 lofi_unmap_file(struct lofi_ioctl
*ulip
, int byfilename
,
2964 struct cred
*credp
, int ioctl_flag
)
2966 struct lofi_state
*lsp
;
2967 struct lofi_ioctl
*klip
;
2968 nvlist_t
*nvl
= NULL
;
2970 char name
[MAXNAMELEN
];
2973 err
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
2977 mutex_enter(&lofi_lock
);
2979 if ((err
= file_to_lofi(klip
->li_filename
, klip
->li_readonly
,
2981 mutex_exit(&lofi_lock
);
2984 } else if (klip
->li_id
== 0) {
2985 mutex_exit(&lofi_lock
);
2986 free_lofi_ioctl(klip
);
2989 lsp
= ddi_get_soft_state(lofi_statep
, klip
->li_id
);
2992 if (lsp
== NULL
|| lsp
->ls_vp
== NULL
|| lofi_access(lsp
) != 0) {
2993 mutex_exit(&lofi_lock
);
2994 free_lofi_ioctl(klip
);
2998 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
3001 * If it's still held open, we'll do one of three things:
3003 * If no flag is set, just return EBUSY.
3005 * If the 'cleanup' flag is set, unmap and remove the device when
3006 * the last user finishes.
3008 * If the 'force' flag is set, then we forcibly close the underlying
3009 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl
3010 * will return DKIO_DEV_GONE. When the device is last closed, the
3011 * device will be cleaned up appropriately.
3013 * This is complicated by the fact that we may have outstanding
3014 * dispatched I/Os. Rather than having a single mutex to serialize all
3015 * I/O, we keep a count of the number of outstanding I/O requests
3016 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os
3017 * should be dispatched (ls_vp_closereq).
3019 * We set the flag, wait for the number of outstanding I/Os to reach 0,
3020 * and then close the underlying vnode.
3022 if (is_opened(lsp
)) {
3023 if (klip
->li_force
) {
3024 mutex_enter(&lsp
->ls_vp_lock
);
3025 lsp
->ls_vp_closereq
= B_TRUE
;
3026 /* wake up any threads waiting on dkiocstate */
3027 cv_broadcast(&lsp
->ls_vp_cv
);
3028 while (lsp
->ls_vp_iocount
> 0)
3029 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
3030 mutex_exit(&lsp
->ls_vp_lock
);
3033 } else if (klip
->li_cleanup
) {
3034 lsp
->ls_cleanup
= 1;
3035 mutex_exit(&lofi_lock
);
3036 free_lofi_ioctl(klip
);
3040 mutex_exit(&lofi_lock
);
3041 free_lofi_ioctl(klip
);
3047 lofi_destroy(lsp
, credp
);
3050 * check the lofi_devlink_cache if device is really gone.
3051 * note: we just wait for timeout here and dont give error if
3052 * timer will expire. This check is to try to ensure the unmap is
3053 * really done when lofiadm -d completes.
3054 * Since lofi_lock is held, also hopefully the lofiadm -a calls
3055 * wont interfere the the unmap.
3057 (void) snprintf(name
, sizeof (name
), "%d", klip
->li_id
);
3058 ticks
= ddi_get_lbolt() + LOFI_TIMEOUT
* drv_usectohz(1000000);
3059 mutex_enter(&lofi_chan_lock
);
3060 while (nvlist_lookup_nvlist(lofi_devlink_cache
, name
, &nvl
) == 0) {
3061 err
= cv_timedwait(&lofi_chan_cv
, &lofi_chan_lock
, ticks
);
3065 mutex_exit(&lofi_chan_lock
);
3067 mutex_exit(&lofi_lock
);
3068 (void) copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3069 free_lofi_ioctl(klip
);
3074 * get the filename given the minor number, or the minor number given
3079 lofi_get_info(dev_t dev
, struct lofi_ioctl
*ulip
, int which
,
3080 struct cred
*credp
, int ioctl_flag
)
3082 struct lofi_ioctl
*klip
;
3083 struct lofi_state
*lsp
;
3086 error
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
3091 case LOFI_GET_FILENAME
:
3092 if (klip
->li_id
== 0) {
3093 free_lofi_ioctl(klip
);
3097 mutex_enter(&lofi_lock
);
3098 lsp
= ddi_get_soft_state(lofi_statep
, klip
->li_id
);
3099 if (lsp
== NULL
|| lofi_access(lsp
) != 0) {
3100 mutex_exit(&lofi_lock
);
3101 free_lofi_ioctl(klip
);
3106 * This may fail if, for example, we're trying to look
3107 * up a zoned NFS path from the global zone.
3109 if (vnodetopath(NULL
, lsp
->ls_stacked_vp
, klip
->li_filename
,
3110 sizeof (klip
->li_filename
), CRED()) != 0) {
3111 (void) strlcpy(klip
->li_filename
, "?",
3112 sizeof (klip
->li_filename
));
3115 klip
->li_readonly
= lsp
->ls_readonly
;
3116 klip
->li_labeled
= lsp
->ls_cmlbhandle
!= NULL
;
3118 (void) strlcpy(klip
->li_algorithm
, lsp
->ls_comp_algorithm
,
3119 sizeof (klip
->li_algorithm
));
3120 klip
->li_crypto_enabled
= lsp
->ls_crypto_enabled
;
3121 mutex_exit(&lofi_lock
);
3123 lofi_copy_devpath(klip
);
3124 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3125 free_lofi_ioctl(klip
);
3127 case LOFI_GET_MINOR
:
3128 mutex_enter(&lofi_lock
);
3129 error
= file_to_lofi(klip
->li_filename
,
3130 klip
->li_readonly
, &lsp
);
3132 mutex_exit(&lofi_lock
);
3133 free_lofi_ioctl(klip
);
3136 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
3138 klip
->li_readonly
= lsp
->ls_readonly
;
3139 klip
->li_labeled
= lsp
->ls_cmlbhandle
!= NULL
;
3140 mutex_exit(&lofi_lock
);
3142 lofi_copy_devpath(klip
);
3143 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3145 free_lofi_ioctl(klip
);
3147 case LOFI_CHECK_COMPRESSED
:
3148 mutex_enter(&lofi_lock
);
3149 error
= file_to_lofi(klip
->li_filename
,
3150 klip
->li_readonly
, &lsp
);
3152 mutex_exit(&lofi_lock
);
3153 free_lofi_ioctl(klip
);
3157 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
3158 (void) strlcpy(klip
->li_algorithm
, lsp
->ls_comp_algorithm
,
3159 sizeof (klip
->li_algorithm
));
3161 mutex_exit(&lofi_lock
);
3162 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3163 free_lofi_ioctl(klip
);
3166 free_lofi_ioctl(klip
);
3172 uscsi_is_inquiry(intptr_t arg
, int flag
, union scsi_cdb
*cdb
,
3173 struct uscsi_cmd
*uscmd
)
3177 #ifdef _MULTI_DATAMODEL
3178 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3179 case DDI_MODEL_ILP32
: {
3180 struct uscsi_cmd32 ucmd32
;
3182 if (ddi_copyin((void *)arg
, &ucmd32
, sizeof (ucmd32
), flag
)) {
3186 uscsi_cmd32touscsi_cmd((&ucmd32
), uscmd
);
3189 case DDI_MODEL_NONE
:
3190 if (ddi_copyin((void *)arg
, uscmd
, sizeof (*uscmd
), flag
)) {
3200 if (ddi_copyin((void *)arg
, uscmd
, sizeof (*uscmd
), flag
)) {
3204 #endif /* _MULTI_DATAMODEL */
3205 if (ddi_copyin(uscmd
->uscsi_cdb
, cdb
, uscmd
->uscsi_cdblen
, flag
)) {
3209 if (cdb
->scc_cmd
== SCMD_INQUIRY
) {
3217 lofi_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int flag
, cred_t
*credp
,
3221 enum dkio_state dkstate
;
3222 struct lofi_state
*lsp
;
3225 id
= LOFI_MINOR2ID(getminor(dev
));
3227 /* lofi ioctls only apply to the master device */
3229 struct lofi_ioctl
*lip
= (struct lofi_ioctl
*)arg
;
3232 * the query command only need read-access - i.e., normal
3233 * users are allowed to do those on the ctl device as
3234 * long as they can open it read-only.
3238 if ((flag
& FWRITE
) == 0)
3240 return (lofi_map_file(dev
, lip
, 1, rvalp
, credp
, flag
));
3241 case LOFI_MAP_FILE_MINOR
:
3242 if ((flag
& FWRITE
) == 0)
3244 return (lofi_map_file(dev
, lip
, 0, rvalp
, credp
, flag
));
3245 case LOFI_UNMAP_FILE
:
3246 if ((flag
& FWRITE
) == 0)
3248 return (lofi_unmap_file(lip
, 1, credp
, flag
));
3249 case LOFI_UNMAP_FILE_MINOR
:
3250 if ((flag
& FWRITE
) == 0)
3252 return (lofi_unmap_file(lip
, 0, credp
, flag
));
3253 case LOFI_GET_FILENAME
:
3254 return (lofi_get_info(dev
, lip
, LOFI_GET_FILENAME
,
3256 case LOFI_GET_MINOR
:
3257 return (lofi_get_info(dev
, lip
, LOFI_GET_MINOR
,
3261 * This API made limited sense when this value was fixed
3262 * at LOFI_MAX_FILES. However, its use to iterate
3263 * across all possible devices in lofiadm means we don't
3264 * want to return L_MAXMIN, but the highest
3267 case LOFI_GET_MAXMINOR
:
3270 mutex_enter(&lofi_lock
);
3272 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
;
3273 lsp
= list_next(&lofi_list
, lsp
)) {
3275 if (lofi_access(lsp
) != 0)
3278 i
= ddi_get_instance(lsp
->ls_dip
);
3283 mutex_exit(&lofi_lock
);
3285 error
= ddi_copyout(&id
, &lip
->li_id
,
3291 case LOFI_CHECK_COMPRESSED
:
3292 return (lofi_get_info(dev
, lip
, LOFI_CHECK_COMPRESSED
,
3299 mutex_enter(&lofi_lock
);
3300 lsp
= ddi_get_soft_state(lofi_statep
, id
);
3301 if (lsp
== NULL
|| lsp
->ls_vp_closereq
) {
3302 mutex_exit(&lofi_lock
);
3305 mutex_exit(&lofi_lock
);
3307 if (ddi_prop_exists(DDI_DEV_T_ANY
, lsp
->ls_dip
, DDI_PROP_DONTPASS
,
3309 error
= cmlb_ioctl(lsp
->ls_cmlbhandle
, dev
, cmd
, arg
, flag
,
3311 if (error
!= ENOTTY
)
3316 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
3317 * EIO as if the device was no longer present.
3319 if (lsp
->ls_vp
== NULL
&& cmd
!= DKIOCSTATE
)
3322 /* these are for faking out utilities like newfs */
3324 case DKIOCGMEDIAINFO
:
3325 case DKIOCGMEDIAINFOEXT
: {
3326 struct dk_minfo_ext media_info
;
3327 int shift
= lsp
->ls_lbshift
;
3330 if (cmd
== DKIOCGMEDIAINFOEXT
) {
3331 media_info
.dki_pbsize
= 1U << lsp
->ls_pbshift
;
3332 size
= sizeof (struct dk_minfo_ext
);
3334 size
= sizeof (struct dk_minfo
);
3337 media_info
.dki_media_type
= DK_FIXED_DISK
;
3338 media_info
.dki_lbsize
= 1U << shift
;
3339 media_info
.dki_capacity
=
3340 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> shift
;
3342 if (ddi_copyout(&media_info
, (void *)arg
, size
, flag
))
3346 case DKIOCREMOVABLE
: {
3348 if (ddi_copyout(&i
, (caddr_t
)arg
, sizeof (int), flag
))
3355 fake_disk_vtoc(lsp
, &vt
);
3357 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3358 case DDI_MODEL_ILP32
: {
3359 struct vtoc32 vtoc32
;
3361 vtoctovtoc32(vt
, vtoc32
);
3362 if (ddi_copyout(&vtoc32
, (void *)arg
,
3363 sizeof (struct vtoc32
), flag
))
3368 case DDI_MODEL_NONE
:
3369 if (ddi_copyout(&vt
, (void *)arg
,
3370 sizeof (struct vtoc
), flag
))
3378 fake_disk_info(dev
, &ci
);
3379 if (ddi_copyout(&ci
, (void *)arg
, sizeof (ci
), flag
))
3383 case DKIOCG_VIRTGEOM
:
3384 case DKIOCG_PHYGEOM
:
3386 error
= ddi_copyout(&lsp
->ls_dkg
, (void *)arg
,
3387 sizeof (struct dk_geom
), flag
);
3393 * Normally, lofi devices are always in the INSERTED state. If
3394 * a device is forcefully unmapped, then the device transitions
3395 * to the DKIO_DEV_GONE state.
3397 if (ddi_copyin((void *)arg
, &dkstate
, sizeof (dkstate
),
3401 mutex_enter(&lsp
->ls_vp_lock
);
3402 lsp
->ls_vp_iocount
++;
3403 while (((dkstate
== DKIO_INSERTED
&& lsp
->ls_vp
!= NULL
) ||
3404 (dkstate
== DKIO_DEV_GONE
&& lsp
->ls_vp
== NULL
)) &&
3405 !lsp
->ls_vp_closereq
) {
3407 * By virtue of having the device open, we know that
3408 * 'lsp' will remain valid when we return.
3410 if (!cv_wait_sig(&lsp
->ls_vp_cv
,
3411 &lsp
->ls_vp_lock
)) {
3412 lsp
->ls_vp_iocount
--;
3413 cv_broadcast(&lsp
->ls_vp_cv
);
3414 mutex_exit(&lsp
->ls_vp_lock
);
3419 dkstate
= (!lsp
->ls_vp_closereq
&& lsp
->ls_vp
!= NULL
?
3420 DKIO_INSERTED
: DKIO_DEV_GONE
);
3421 lsp
->ls_vp_iocount
--;
3422 cv_broadcast(&lsp
->ls_vp_cv
);
3423 mutex_exit(&lsp
->ls_vp_lock
);
3425 if (ddi_copyout(&dkstate
, (void *)arg
,
3426 sizeof (dkstate
), flag
) != 0)
3430 struct uscsi_cmd uscmd
;
3433 if (uscsi_is_inquiry(arg
, flag
, &cdb
, &uscmd
) == 0) {
3434 struct scsi_inquiry inq
= {0};
3436 lofi_create_inquiry(lsp
, &inq
);
3437 if (ddi_copyout(&inq
, uscmd
.uscsi_bufaddr
,
3438 uscmd
.uscsi_buflen
, flag
) != 0)
3441 } else if (cdb
.scc_cmd
== SCMD_READ_CAPACITY
) {
3442 struct scsi_capacity capacity
;
3445 BE_32((lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >>
3447 capacity
.lbasize
= BE_32(1 << lsp
->ls_lbshift
);
3448 if (ddi_copyout(&capacity
, uscmd
.uscsi_bufaddr
,
3449 uscmd
.uscsi_buflen
, flag
) != 0)
3454 uscmd
.uscsi_rqstatus
= 0xff;
3455 #ifdef _MULTI_DATAMODEL
3456 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3457 case DDI_MODEL_ILP32
: {
3458 struct uscsi_cmd32 ucmd32
;
3459 uscsi_cmdtouscsi_cmd32((&uscmd
), (&ucmd32
));
3460 if (ddi_copyout(&ucmd32
, (void *)arg
, sizeof (ucmd32
),
3465 case DDI_MODEL_NONE
:
3466 if (ddi_copyout(&uscmd
, (void *)arg
, sizeof (uscmd
),
3474 if (ddi_copyout(&uscmd
, (void *)arg
, sizeof (uscmd
), flag
) != 0)
3476 #endif /* _MULTI_DATAMODEL */
3481 cmn_err(CE_WARN
, "lofi_ioctl: %d is not implemented\n", cmd
);
3488 lofi_prop_op(dev_t dev
, dev_info_t
*dip
, ddi_prop_op_t prop_op
, int mod_flags
,
3489 char *name
, caddr_t valuep
, int *lengthp
)
3491 struct lofi_state
*lsp
;
3493 lsp
= ddi_get_soft_state(lofi_statep
, ddi_get_instance(dip
));
3495 return (ddi_prop_op(dev
, dip
, prop_op
, mod_flags
,
3496 name
, valuep
, lengthp
));
3499 return (cmlb_prop_op(lsp
->ls_cmlbhandle
, dev
, dip
, prop_op
, mod_flags
,
3500 name
, valuep
, lengthp
, LOFI_PART(getminor(dev
)), NULL
));
3503 static struct cb_ops lofi_cb_ops
= {
3504 lofi_open
, /* open */
3505 lofi_close
, /* close */
3506 lofi_strategy
, /* strategy */
3509 lofi_read
, /* read */
3510 lofi_write
, /* write */
3511 lofi_ioctl
, /* ioctl */
3515 nochpoll
, /* poll */
3516 lofi_prop_op
, /* prop_op */
3518 D_64BIT
| D_NEW
| D_MP
, /* Driver compatibility flag */
3524 static struct dev_ops lofi_ops
= {
3525 DEVO_REV
, /* devo_rev, */
3527 lofi_info
, /* info */
3528 nulldev
, /* identify */
3529 nulldev
, /* probe */
3530 lofi_attach
, /* attach */
3531 lofi_detach
, /* detach */
3533 &lofi_cb_ops
, /* driver operations */
3534 NULL
, /* no bus operations */
3536 ddi_quiesce_not_needed
, /* quiesce */
3539 static struct modldrv modldrv
= {
3541 "loopback file driver",
3545 static struct modlinkage modlinkage
= {
3556 list_create(&lofi_list
, sizeof (struct lofi_state
),
3557 offsetof(struct lofi_state
, ls_list
));
3559 error
= ddi_soft_state_init((void **)&lofi_statep
,
3560 sizeof (struct lofi_state
), 0);
3562 list_destroy(&lofi_list
);
3567 * The minor number is stored as id << LOFI_CMLB_SHIFT as
3568 * we need to reserve space for cmlb minor numbers.
3569 * This will leave out 4096 id values on 32bit kernel, which should
3572 lofi_id
= id_space_create("lofi_id", 1,
3573 (1 << (L_BITSMINOR
- LOFI_CMLB_SHIFT
)));
3575 if (lofi_id
== NULL
) {
3576 ddi_soft_state_fini((void **)&lofi_statep
);
3577 list_destroy(&lofi_list
);
3578 return (DDI_FAILURE
);
3581 mutex_init(&lofi_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3582 mutex_init(&lofi_chan_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3583 cv_init(&lofi_chan_cv
, NULL
, CV_DRIVER
, NULL
);
3584 error
= nvlist_alloc(&lofi_devlink_cache
, NV_UNIQUE_NAME
, KM_SLEEP
);
3587 error
= mod_install(&modlinkage
);
3589 id_space_destroy(lofi_id
);
3590 if (lofi_devlink_cache
!= NULL
)
3591 nvlist_free(lofi_devlink_cache
);
3592 mutex_destroy(&lofi_chan_lock
);
3593 cv_destroy(&lofi_chan_cv
);
3594 mutex_destroy(&lofi_lock
);
3595 ddi_soft_state_fini((void **)&lofi_statep
);
3596 list_destroy(&lofi_list
);
3607 mutex_enter(&lofi_lock
);
3609 if (!list_is_empty(&lofi_list
)) {
3610 mutex_exit(&lofi_lock
);
3614 mutex_exit(&lofi_lock
);
3616 error
= mod_remove(&modlinkage
);
3620 mutex_enter(&lofi_chan_lock
);
3621 nvlist_free(lofi_devlink_cache
);
3622 lofi_devlink_cache
= NULL
;
3623 mutex_exit(&lofi_chan_lock
);
3625 mutex_destroy(&lofi_chan_lock
);
3626 cv_destroy(&lofi_chan_cv
);
3627 mutex_destroy(&lofi_lock
);
3628 id_space_destroy(lofi_id
);
3629 ddi_soft_state_fini((void **)&lofi_statep
);
3630 list_destroy(&lofi_list
);
3636 _info(struct modinfo
*modinfop
)
3638 return (mod_info(&modlinkage
, modinfop
));