4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2017 RackTop Systems.
38 #include <sys/processor.h>
39 #include <sys/zfs_context.h>
40 #include <sys/rrwlock.h>
42 #include <sys/utsname.h>
43 #include <sys/systeminfo.h>
45 extern void system_taskq_init(void);
46 extern void system_taskq_fini(void);
49 * Emulation of kernel services in userland.
53 vnode_t
*rootdir
= (vnode_t
*)0xabcd1234;
54 char hw_serial
[HW_HOSTID_LEN
];
56 vmem_t
*zio_arena
= NULL
;
58 /* If set, all blocks read will be copied to the specified directory. */
59 char *vn_dumpdir
= NULL
;
61 struct utsname utsname
= {
62 "userland", "libzpool", "1", "1", "na"
66 * =========================================================================
68 * =========================================================================
71 * Note: for the xxxat() versions of these functions, we assume that the
72 * starting vp is always rootdir (which is true for spa_directory.c, the only
73 * ZFS consumer of these interfaces). We assert this is true, and then emulate
74 * them by adding '/' in front of the path.
79 vn_open(char *path
, int x1
, int flags
, int mode
, vnode_t
**vpp
, int x2
, int x3
)
85 char realpath
[MAXPATHLEN
];
89 * If we're accessing a real disk from userland, we need to use
90 * the character interface to avoid caching. This is particularly
91 * important if we're trying to look at a real in-kernel storage
92 * pool from userland, e.g. via zdb, because otherwise we won't
93 * see the changes occurring under the segmap cache.
94 * On the other hand, the stupid character device returns zero
95 * for its size. So -- gag -- we open the block device to get
96 * its size, and remember it for subsequent VOP_GETATTR().
98 if (strncmp(path
, "/dev/", 5) == 0) {
100 fd
= open64(path
, O_RDONLY
);
103 if (fstat64(fd
, &st
) == -1) {
108 (void) sprintf(realpath
, "%s", path
);
109 dsk
= strstr(path
, "/dsk/");
111 (void) sprintf(realpath
+ (dsk
- path
) + 1, "r%s",
114 (void) sprintf(realpath
, "%s", path
);
115 if (!(flags
& FCREAT
) && stat64(realpath
, &st
) == -1)
120 old_umask
= umask(0);
123 * The construct 'flags - FREAD' conveniently maps combinations of
124 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
126 fd
= open64(realpath
, flags
- FREAD
, mode
);
129 (void) umask(old_umask
);
131 if (vn_dumpdir
!= NULL
) {
132 char dumppath
[MAXPATHLEN
];
133 (void) snprintf(dumppath
, sizeof (dumppath
),
134 "%s/%s", vn_dumpdir
, basename(realpath
));
135 dump_fd
= open64(dumppath
, O_CREAT
| O_WRONLY
, 0666);
145 if (fstat64(fd
, &st
) == -1) {
150 (void) fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
152 *vpp
= vp
= umem_zalloc(sizeof (vnode_t
), UMEM_NOFAIL
);
155 vp
->v_size
= st
.st_size
;
156 vp
->v_path
= spa_strdup(path
);
157 vp
->v_dump_fd
= dump_fd
;
164 vn_openat(char *path
, int x1
, int flags
, int mode
, vnode_t
**vpp
, int x2
,
165 int x3
, vnode_t
*startvp
, int fd
)
167 char *realpath
= umem_alloc(strlen(path
) + 2, UMEM_NOFAIL
);
170 ASSERT(startvp
== rootdir
);
171 (void) sprintf(realpath
, "/%s", path
);
173 /* fd ignored for now, need if want to simulate nbmand support */
174 ret
= vn_open(realpath
, x1
, flags
, mode
, vpp
, x2
, x3
);
176 umem_free(realpath
, strlen(path
) + 2);
183 vn_rdwr(int uio
, vnode_t
*vp
, void *addr
, ssize_t len
, offset_t offset
,
184 int x1
, int x2
, rlim64_t x3
, void *x4
, ssize_t
*residp
)
186 ssize_t iolen
, split
;
188 if (uio
== UIO_READ
) {
189 iolen
= pread64(vp
->v_fd
, addr
, len
, offset
);
190 if (vp
->v_dump_fd
!= -1) {
192 pwrite64(vp
->v_dump_fd
, addr
, iolen
, offset
);
193 ASSERT(status
!= -1);
197 * To simulate partial disk writes, we split writes into two
198 * system calls so that the process can be killed in between.
200 int sectors
= len
>> SPA_MINBLOCKSHIFT
;
201 split
= (sectors
> 0 ? rand() % sectors
: 0) <<
203 iolen
= pwrite64(vp
->v_fd
, addr
, split
, offset
);
204 iolen
+= pwrite64(vp
->v_fd
, (char *)addr
+ split
,
205 len
- split
, offset
+ split
);
211 *residp
= len
- iolen
;
212 else if (iolen
!= len
)
218 vn_close(vnode_t
*vp
)
221 if (vp
->v_dump_fd
!= -1)
222 close(vp
->v_dump_fd
);
223 spa_strfree(vp
->v_path
);
224 umem_free(vp
, sizeof (vnode_t
));
228 * At a minimum we need to update the size since vdev_reopen()
229 * will no longer call vn_openat().
232 fop_getattr(vnode_t
*vp
, vattr_t
*vap
)
236 if (fstat64(vp
->v_fd
, &st
) == -1) {
241 vap
->va_size
= st
.st_size
;
248 * =========================================================================
249 * Figure out which debugging statements to print
250 * =========================================================================
253 static char *dprintf_string
;
254 static int dprintf_print_all
;
257 dprintf_find_string(const char *string
)
259 char *tmp_str
= dprintf_string
;
260 int len
= strlen(string
);
263 * Find out if this is a string we want to print.
264 * String format: file1.c,function_name1,file2.c,file3.c
267 while (tmp_str
!= NULL
) {
268 if (strncmp(tmp_str
, string
, len
) == 0 &&
269 (tmp_str
[len
] == ',' || tmp_str
[len
] == '\0'))
271 tmp_str
= strchr(tmp_str
, ',');
273 tmp_str
++; /* Get rid of , */
279 dprintf_setup(int *argc
, char **argv
)
284 * Debugging can be specified two ways: by setting the
285 * environment variable ZFS_DEBUG, or by including a
286 * "debug=..." argument on the command line. The command
287 * line setting overrides the environment variable.
290 for (i
= 1; i
< *argc
; i
++) {
291 int len
= strlen("debug=");
292 /* First look for a command line argument */
293 if (strncmp("debug=", argv
[i
], len
) == 0) {
294 dprintf_string
= argv
[i
] + len
;
295 /* Remove from args */
296 for (j
= i
; j
< *argc
; j
++)
303 if (dprintf_string
== NULL
) {
304 /* Look for ZFS_DEBUG environment variable */
305 dprintf_string
= getenv("ZFS_DEBUG");
309 * Are we just turning on all debugging?
311 if (dprintf_find_string("on"))
312 dprintf_print_all
= 1;
314 if (dprintf_string
!= NULL
)
315 zfs_flags
|= ZFS_DEBUG_DPRINTF
;
319 * =========================================================================
321 * =========================================================================
324 __dprintf(const char *file
, const char *func
, int line
, const char *fmt
, ...)
330 * Get rid of annoying "../common/" prefix to filename.
332 newfile
= strrchr(file
, '/');
333 if (newfile
!= NULL
) {
334 newfile
= newfile
+ 1; /* Get rid of leading / */
339 if (dprintf_print_all
||
340 dprintf_find_string(newfile
) ||
341 dprintf_find_string(func
)) {
342 /* Print out just the function name if requested */
344 if (dprintf_find_string("pid"))
345 (void) printf("%d ", getpid());
346 if (dprintf_find_string("tid"))
347 (void) printf("%u ", thr_self());
348 if (dprintf_find_string("cpu"))
349 (void) printf("%u ", getcpuid());
350 if (dprintf_find_string("time"))
351 (void) printf("%llu ", gethrtime());
352 if (dprintf_find_string("long"))
353 (void) printf("%s, line %d: ", newfile
, line
);
354 (void) printf("%s: ", func
);
356 (void) vprintf(fmt
, adx
);
362 #endif /* ZFS_DEBUG */
365 * =========================================================================
367 * =========================================================================
370 kobj_open_file(char *name
)
375 /* set vp as the _fd field of the file */
376 if (vn_openat(name
, UIO_SYSSPACE
, FREAD
, 0, &vp
, 0, 0, rootdir
,
378 return ((void *)-1UL);
380 file
= umem_zalloc(sizeof (struct _buf
), UMEM_NOFAIL
);
381 file
->_fd
= (intptr_t)vp
;
386 kobj_read_file(struct _buf
*file
, char *buf
, unsigned size
, unsigned off
)
390 vn_rdwr(UIO_READ
, (vnode_t
*)file
->_fd
, buf
, size
, (offset_t
)off
,
391 UIO_SYSSPACE
, 0, 0, 0, &resid
);
393 return (size
- resid
);
397 kobj_close_file(struct _buf
*file
)
399 vn_close((vnode_t
*)file
->_fd
);
400 umem_free(file
, sizeof (struct _buf
));
404 kobj_get_filesize(struct _buf
*file
, uint64_t *size
)
407 vnode_t
*vp
= (vnode_t
*)file
->_fd
;
409 if (fstat64(vp
->v_fd
, &st
) == -1) {
418 * =========================================================================
419 * kernel emulation setup & teardown
420 * =========================================================================
423 umem_out_of_memory(void)
425 char errmsg
[] = "out of memory -- generating core dump\n";
427 write(fileno(stderr
), errmsg
, sizeof (errmsg
));
433 kernel_init(int mode
)
435 extern uint_t rrw_tsd_key
;
437 umem_nofail_callback(umem_out_of_memory
);
439 physmem
= sysconf(_SC_PHYS_PAGES
);
441 dprintf("physmem = %llu pages (%.2f GB)\n", physmem
,
442 (double)physmem
* sysconf(_SC_PAGE_SIZE
) / (1ULL << 30));
444 (void) snprintf(hw_serial
, sizeof (hw_serial
), "%ld",
445 (mode
& FWRITE
) ? gethostid() : 0);
449 mutex_init(&cpu_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
453 tsd_create(&rrw_tsd_key
, rrw_tsd_destroy
);
465 z_uncompress(void *dst
, size_t *dstlen
, const void *src
, size_t srclen
)
468 uLongf len
= *dstlen
;
470 if ((ret
= uncompress(dst
, &len
, src
, srclen
)) == Z_OK
)
471 *dstlen
= (size_t)len
;
477 z_compress_level(void *dst
, size_t *dstlen
, const void *src
, size_t srclen
,
481 uLongf len
= *dstlen
;
483 if ((ret
= compress2(dst
, &len
, src
, srclen
, level
)) == Z_OK
)
484 *dstlen
= (size_t)len
;
490 zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
)
496 zfs_secpolicy_rename_perms(const char *from
, const char *to
, cred_t
*cr
)
502 zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
)
509 zfs_onexit_fd_hold(int fd
, minor_t
*minorp
)
517 zfs_onexit_fd_rele(int fd
)
523 zfs_onexit_add_cb(minor_t minor
, void (*func
)(void *), void *data
,
524 uint64_t *action_handle
)
531 zfs_onexit_del_cb(minor_t minor
, uint64_t action_handle
, boolean_t fire
)
538 zfs_onexit_cb_data(minor_t minor
, uint64_t action_handle
, void **data
)
546 bzero(bp
, sizeof (buf_t
));
552 if (bp
->b_iodone
!= NULL
) {
553 (*(bp
->b_iodone
))(bp
);
556 ASSERT((bp
->b_flags
& B_DONE
) == 0);
557 bp
->b_flags
|= B_DONE
;
561 bioerror(buf_t
*bp
, int error
)
567 bp
->b_flags
|= B_ERROR
;
569 bp
->b_flags
&= ~B_ERROR
;
576 geterror(struct buf
*bp
)
580 if (bp
->b_flags
& B_ERROR
) {