4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
44 #include <sys/termios.h>
45 #include <sys/stream.h>
46 #include <sys/strsubr.h>
47 #include <sys/strsun.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/cmn_err.h>
52 #include <sys/mkdev.h>
53 #include <sys/pathname.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
64 #include <sys/ucred.h>
66 #include <sys/tiuser.h>
67 #define _SUN_TPI_VERSION 2
68 #include <sys/tihdr.h>
72 #include <fs/sockfs/nl7c.h>
73 #include <fs/sockfs/sockcommon.h>
74 #include <fs/sockfs/sockfilter_impl.h>
75 #include <fs/sockfs/socktpi.h>
76 #include <fs/sockfs/socktpi_impl.h>
77 #include <fs/sockfs/sodirect.h>
80 * Macros that operate on struct cmsghdr.
81 * The CMSG_VALID macro does not assume that the last option buffer is padded.
83 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
84 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
85 #define CMSG_VALID(cmsg, start, end) \
86 (ISALIGNED_cmsghdr(cmsg) && \
87 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
88 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
89 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
90 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
91 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
93 dev_t sockdev
; /* For fsid in getattr */
94 int sockfs_defer_nl7c_init
= 0;
96 struct socklist socklist
;
98 struct kmem_cache
*socket_cache
;
101 * sockconf_lock protects the socket configuration (socket types and
102 * socket filters) which is changed via the sockconfig system call.
104 krwlock_t sockconf_lock
;
106 static int sockfs_update(kstat_t
*, int);
107 static int sockfs_snapshot(kstat_t
*, void *, int);
108 extern smod_info_t
*sotpi_smod_create(void);
110 extern void sendfile_init();
112 extern void nl7c_init(void);
114 extern int modrootloaded
;
116 #define ADRSTRLEN (2 * sizeof (void *) + 1)
118 * kernel structure for passing the sockinfo data back up to the user.
119 * the strings array allows us to convert AF_UNIX addresses into strings
120 * with a common method regardless of which n-bit kernel we're running.
123 struct sockinfo ks_si
;
124 char ks_straddr
[3][ADRSTRLEN
];
128 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
129 * Returns with the vnode held.
132 sogetvp(char *devpath
, vnode_t
**vpp
, int uioflag
)
139 ASSERT(uioflag
== UIO_SYSSPACE
|| uioflag
== UIO_USERSPACE
);
142 * Lookup the underlying filesystem vnode.
144 error
= lookupname(devpath
, uioflag
, FOLLOW
, NULLVPP
, &vp
);
148 /* Check that it is the correct vnode */
149 if (vp
->v_type
!= VCHR
) {
155 * If devpath went through devfs, the device should already
156 * be configured. If devpath is a mknod file, however, we
157 * need to make sure the device is properly configured.
158 * To do this, we do something similar to spec_open()
159 * except that we resolve to the minor/leaf level since
160 * we need to return a vnode.
162 csp
= VTOS(VTOS(vp
)->s_commonvp
);
163 if (!(csp
->s_flag
& SDIPSET
)) {
164 char *pathname
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
165 error
= ddi_dev_pathname(vp
->v_rdev
, S_IFCHR
, pathname
);
167 error
= devfs_lookupname(pathname
, NULLVPP
, &dvp
);
169 kmem_free(pathname
, MAXPATHLEN
);
172 vp
= dvp
; /* use the devfs vp */
175 /* device is configured at this point */
176 maj
= getmajor(vp
->v_rdev
);
177 if (!STREAMSTAB(maj
)) {
187 * Update the accessed, updated, or changed times in an sonode
188 * with the current time.
190 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
191 * attributes in a fstat call. (They return the current time and 0 for
192 * all timestamps, respectively.) We maintain the current timestamps
193 * here primarily so that should sockmod be popped the resulting
194 * file descriptor will behave like a stream w.r.t. the timestamps.
197 so_update_attrs(struct sonode
*so
, int flag
)
199 time_t now
= gethrestime_sec();
201 if (SOCK_IS_NONSTR(so
))
204 mutex_enter(&so
->so_lock
);
207 SOTOTPI(so
)->sti_atime
= now
;
209 SOTOTPI(so
)->sti_mtime
= now
;
210 mutex_exit(&so
->so_lock
);
213 extern so_create_func_t sock_comm_create_function
;
214 extern so_destroy_func_t sock_comm_destroy_function
;
216 * Init function called when sockfs is loaded.
219 sockinit(int fstype
, char *name
)
221 static const fs_operation_def_t sock_vfsops_template
[] = {
228 error
= vfs_setfsops(fstype
, sock_vfsops_template
, NULL
);
230 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
231 "sockinit: bad vfs ops template");
235 error
= vn_make_ops(name
, socket_vnodeops_template
,
238 err_str
= "sockinit: bad socket vnode ops template";
239 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
240 socket_vnodeops
= NULL
;
244 socket_cache
= kmem_cache_create("socket_cache",
245 sizeof (struct sonode
), 0, sonode_constructor
,
246 sonode_destructor
, NULL
, NULL
, NULL
, 0);
248 rw_init(&sockconf_lock
, NULL
, RW_DEFAULT
, NULL
);
250 error
= socktpi_init();
263 * Set up the default create and destroy functions
265 sock_comm_create_function
= socket_sonode_create
;
266 sock_comm_destroy_function
= socket_sonode_destroy
;
269 * Build initial list mapping socket parameters to vnode.
272 smod_add(sotpi_smod_create());
277 * If sockets are needed before init runs /sbin/soconfig
278 * it is possible to preload the sockparams list here using
280 * sockconfig(1,2,3, "/dev/tcp", 0);
284 * Create a unique dev_t for use in so_fsid.
287 if ((dev
= getudev()) == (major_t
)-1)
289 sockdev
= makedevice(dev
, 0);
291 mutex_init(&socklist
.sl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
293 if (!modrootloaded
) {
294 sockfs_defer_nl7c_init
= 1;
299 /* Initialize socket filters */
305 (void) vfs_freevfsops_by_type(fstype
);
306 if (socket_vnodeops
!= NULL
)
307 vn_freevnodeops(socket_vnodeops
);
309 zcmn_err(GLOBAL_ZONEID
, CE_WARN
, err_str
);
314 * Caller must hold the mutex. Used to set SOLOCKED.
317 so_lock_single(struct sonode
*so
)
319 ASSERT(MUTEX_HELD(&so
->so_lock
));
321 while (so
->so_flag
& (SOLOCKED
| SOASYNC_UNBIND
)) {
322 cv_wait_stop(&so
->so_single_cv
, &so
->so_lock
,
323 SO_LOCK_WAKEUP_TIME
);
325 so
->so_flag
|= SOLOCKED
;
329 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
330 * Used to clear SOLOCKED or SOASYNC_UNBIND.
333 so_unlock_single(struct sonode
*so
, int flag
)
335 ASSERT(MUTEX_HELD(&so
->so_lock
));
336 ASSERT(flag
& (SOLOCKED
|SOASYNC_UNBIND
));
337 ASSERT((flag
& ~(SOLOCKED
|SOASYNC_UNBIND
)) == 0);
338 ASSERT(so
->so_flag
& flag
);
340 * Process the T_DISCON_IND on sti_discon_ind_mp.
342 * Call to so_drain_discon_ind will result in so_lock
343 * being dropped and re-acquired later.
345 if (!SOCK_IS_NONSTR(so
)) {
346 sotpi_info_t
*sti
= SOTOTPI(so
);
348 if (sti
->sti_discon_ind_mp
!= NULL
)
349 so_drain_discon_ind(so
);
352 cv_signal(&so
->so_single_cv
);
353 so
->so_flag
&= ~flag
;
357 * Caller must hold the mutex. Used to set SOREADLOCKED.
358 * If the caller wants nonblocking behavior it should set fmode.
361 so_lock_read(struct sonode
*so
, int fmode
)
363 ASSERT(MUTEX_HELD(&so
->so_lock
));
365 while (so
->so_flag
& SOREADLOCKED
) {
366 if (fmode
& (FNDELAY
|FNONBLOCK
))
367 return (EWOULDBLOCK
);
368 cv_wait_stop(&so
->so_read_cv
, &so
->so_lock
,
369 SO_LOCK_WAKEUP_TIME
);
371 so
->so_flag
|= SOREADLOCKED
;
376 * Like so_lock_read above but allows signals.
379 so_lock_read_intr(struct sonode
*so
, int fmode
)
381 ASSERT(MUTEX_HELD(&so
->so_lock
));
383 while (so
->so_flag
& SOREADLOCKED
) {
384 if (fmode
& (FNDELAY
|FNONBLOCK
))
385 return (EWOULDBLOCK
);
386 if (!cv_wait_sig(&so
->so_read_cv
, &so
->so_lock
))
389 so
->so_flag
|= SOREADLOCKED
;
394 * Caller must hold the mutex. Used to clear SOREADLOCKED,
395 * set in so_lock_read() or so_lock_read_intr().
398 so_unlock_read(struct sonode
*so
)
400 ASSERT(MUTEX_HELD(&so
->so_lock
));
401 ASSERT(so
->so_flag
& SOREADLOCKED
);
403 cv_signal(&so
->so_read_cv
);
404 so
->so_flag
&= ~SOREADLOCKED
;
408 * Verify that the specified offset falls within the mblk and
409 * that the resulting pointer is aligned.
410 * Returns NULL if not.
413 sogetoff(mblk_t
*mp
, t_uscalar_t offset
,
414 t_uscalar_t length
, uint_t align_size
)
416 uintptr_t ptr1
, ptr2
;
418 ASSERT(mp
&& mp
->b_wptr
>= mp
->b_rptr
);
419 ptr1
= (uintptr_t)mp
->b_rptr
+ offset
;
420 ptr2
= (uintptr_t)ptr1
+ length
;
421 if (ptr1
< (uintptr_t)mp
->b_rptr
|| ptr2
> (uintptr_t)mp
->b_wptr
) {
425 if ((ptr1
& (align_size
- 1)) != 0) {
429 return ((void *)ptr1
);
433 * Return the AF_UNIX underlying filesystem vnode matching a given name.
434 * Makes sure the sending and the destination sonodes are compatible.
435 * The vnode is returned held.
437 * The underlying filesystem VSOCK vnode has a v_stream pointer that
438 * references the actual stream head (hence indirectly the actual sonode).
441 so_ux_lookup(struct sonode
*so
, struct sockaddr_un
*soun
, int checkaccess
,
444 vnode_t
*vp
; /* Underlying filesystem vnode */
445 vnode_t
*rvp
; /* real vnode */
446 vnode_t
*svp
; /* sockfs vnode */
450 dprintso(so
, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so
,
453 error
= lookupname(soun
->sun_path
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
455 eprintsoline(so
, error
);
460 * Traverse lofs mounts get the real vnode
462 if (VOP_REALVP(vp
, &rvp
, NULL
) == 0) {
463 VN_HOLD(rvp
); /* hold the real vnode */
464 VN_RELE(vp
); /* release hold from lookup */
468 if (vp
->v_type
!= VSOCK
) {
470 eprintsoline(so
, error
);
476 * Check that we have permissions to access the destination
477 * vnode. This check is not done in BSD but it is required
480 if (error
= VOP_ACCESS(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
)) {
481 eprintsoline(so
, error
);
487 * Check if the remote socket has been closed.
489 * Synchronize with vn_rele_stream by holding v_lock while traversing
490 * v_stream->sd_vnode.
492 mutex_enter(&vp
->v_lock
);
493 if (vp
->v_stream
== NULL
) {
494 mutex_exit(&vp
->v_lock
);
495 if (so
->so_type
== SOCK_DGRAM
)
496 error
= EDESTADDRREQ
;
498 error
= ECONNREFUSED
;
500 eprintsoline(so
, error
);
503 ASSERT(vp
->v_stream
->sd_vnode
);
504 svp
= vp
->v_stream
->sd_vnode
;
506 * holding v_lock on underlying filesystem vnode and acquiring
507 * it on sockfs vnode. Assumes that no code ever attempts to
508 * acquire these locks in the reverse order.
511 mutex_exit(&vp
->v_lock
);
513 if (svp
->v_type
!= VSOCK
) {
515 eprintsoline(so
, error
);
521 if (so
->so_type
!= so2
->so_type
) {
523 eprintsoline(so
, error
);
539 * Verify peer address for connect and sendto/sendmsg.
540 * Since sendto/sendmsg would not get synchronous errors from the transport
541 * provider we have to do these ugly checks in the socket layer to
542 * preserve compatibility with SunOS 4.X.
545 so_addr_verify(struct sonode
*so
, const struct sockaddr
*name
,
550 dprintso(so
, 1, ("so_addr_verify(%p, %p, %d)\n",
551 (void *)so
, (void *)name
, namelen
));
553 ASSERT(name
!= NULL
);
555 family
= so
->so_family
;
558 if (name
->sa_family
!= family
) {
559 eprintsoline(so
, EAFNOSUPPORT
);
560 return (EAFNOSUPPORT
);
562 if (namelen
!= (socklen_t
)sizeof (struct sockaddr_in
)) {
563 eprintsoline(so
, EINVAL
);
569 struct sockaddr_in6
*sin6
;
572 if (name
->sa_family
!= family
) {
573 eprintsoline(so
, EAFNOSUPPORT
);
574 return (EAFNOSUPPORT
);
576 if (namelen
!= (socklen_t
)sizeof (struct sockaddr_in6
)) {
577 eprintsoline(so
, EINVAL
);
581 /* Verify that apps don't forget to clear sin6_scope_id etc */
582 sin6
= (struct sockaddr_in6
*)name
;
583 if (sin6
->sin6_scope_id
!= 0 &&
584 !IN6_IS_ADDR_LINKSCOPE(&sin6
->sin6_addr
)) {
585 zcmn_err(getzoneid(), CE_WARN
,
586 "connect/send* with uninitialized sin6_scope_id "
587 "(%d) on socket. Pid = %d\n",
588 (int)sin6
->sin6_scope_id
, (int)curproc
->p_pid
);
594 if (SOTOTPI(so
)->sti_faddr_noxlate
) {
597 if (namelen
< (socklen_t
)sizeof (short)) {
598 eprintsoline(so
, ENOENT
);
601 if (name
->sa_family
!= family
) {
602 eprintsoline(so
, EAFNOSUPPORT
);
603 return (EAFNOSUPPORT
);
605 /* MAXPATHLEN + soun_family + nul termination */
606 if (namelen
> (socklen_t
)(MAXPATHLEN
+ sizeof (short) + 1)) {
607 eprintsoline(so
, ENAMETOOLONG
);
608 return (ENAMETOOLONG
);
615 * Default is don't do any length or sa_family check
616 * to allow non-sockaddr style addresses.
626 * Translate an AF_UNIX sockaddr_un to the transport internal name.
627 * Assumes caller has called so_addr_verify first.
631 so_ux_addr_xlate(struct sonode
*so
, struct sockaddr
*name
,
632 socklen_t namelen
, int checkaccess
,
633 void **addrp
, socklen_t
*addrlenp
)
636 struct sockaddr_un
*soun
;
640 sotpi_info_t
*sti
= SOTOTPI(so
);
642 dprintso(so
, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
643 (void *)so
, (void *)name
, namelen
, checkaccess
));
645 ASSERT(name
!= NULL
);
646 ASSERT(so
->so_family
== AF_UNIX
);
647 ASSERT(!sti
->sti_faddr_noxlate
);
648 ASSERT(namelen
>= (socklen_t
)sizeof (short));
649 ASSERT(name
->sa_family
== AF_UNIX
);
650 soun
= (struct sockaddr_un
*)name
;
652 * Lookup vnode for the specified path name and verify that
655 error
= so_ux_lookup(so
, soun
, checkaccess
, &vp
);
657 eprintsoline(so
, error
);
661 * Use the address of the peer vnode as the address to send
662 * to. We release the peer vnode here. In case it has been
663 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the
664 * transport the message will get an error or be dropped.
666 sti
->sti_ux_faddr
.soua_vp
= vp
;
667 sti
->sti_ux_faddr
.soua_magic
= SOU_MAGIC_EXPLICIT
;
668 addr
= &sti
->sti_ux_faddr
;
669 addrlen
= (socklen_t
)sizeof (sti
->sti_ux_faddr
);
670 dprintso(so
, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
671 addrlen
, (void *)vp
));
674 *addrlenp
= (socklen_t
)addrlen
;
679 * Esballoc free function for messages that contain SO_FILEP option.
680 * Decrement the reference count on the file pointers using closef.
683 fdbuf_free(struct fdbuf
*fdbuf
)
688 dprint(1, ("fdbuf_free: %d fds\n", fdbuf
->fd_numfd
));
689 for (i
= 0; i
< fdbuf
->fd_numfd
; i
++) {
691 * We need pointer size alignment for fd_fds. On a LP64
692 * kernel, the required alignment is 8 bytes while
693 * the option headers and values are only 4 bytes
694 * aligned. So its safer to do a bcopy compared to
695 * assigning fdbuf->fd_fds[i] to fp.
697 bcopy((char *)&fdbuf
->fd_fds
[i
], (char *)&fp
, sizeof (fp
));
698 dprint(1, ("fdbuf_free: [%d] = %p\n", i
, (void *)fp
));
701 if (fdbuf
->fd_ebuf
!= NULL
)
702 kmem_free(fdbuf
->fd_ebuf
, fdbuf
->fd_ebuflen
);
703 kmem_free(fdbuf
, fdbuf
->fd_size
);
707 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
708 * Waits if memory is not available.
711 fdbuf_allocmsg(int size
, struct fdbuf
*fdbuf
)
716 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size
, fdbuf
->fd_numfd
));
717 buf
= kmem_alloc(size
, KM_SLEEP
);
718 fdbuf
->fd_ebuf
= (caddr_t
)buf
;
719 fdbuf
->fd_ebuflen
= size
;
720 fdbuf
->fd_frtn
.free_func
= fdbuf_free
;
721 fdbuf
->fd_frtn
.free_arg
= (caddr_t
)fdbuf
;
723 mp
= esballoc_wait(buf
, size
, BPRI_MED
, &fdbuf
->fd_frtn
);
724 mp
->b_datap
->db_type
= M_PROTO
;
729 * Extract file descriptors from a fdbuf.
730 * Return list in rights/rightslen.
734 fdbuf_extract(struct fdbuf
*fdbuf
, void *rights
, int rightslen
)
741 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
742 fdbuf
->fd_numfd
, rightslen
));
744 numfd
= fdbuf
->fd_numfd
;
745 ASSERT(rightslen
== numfd
* (int)sizeof (int));
748 * Allocate a file descriptor and increment the f_count.
749 * The latter is needed since we always call fdbuf_free
750 * which performs a closef.
753 for (i
= 0; i
< numfd
; i
++) {
754 if ((fd
= ufalloc(0)) == -1)
757 * We need pointer size alignment for fd_fds. On a LP64
758 * kernel, the required alignment is 8 bytes while
759 * the option headers and values are only 4 bytes
760 * aligned. So its safer to do a bcopy compared to
761 * assigning fdbuf->fd_fds[i] to fp.
763 bcopy((char *)&fdbuf
->fd_fds
[i
], (char *)&fp
, sizeof (fp
));
764 mutex_enter(&fp
->f_tlock
);
766 mutex_exit(&fp
->f_tlock
);
770 audit_fdrecv(fd
, fp
);
771 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
772 i
, fd
, (void *)fp
, fp
->f_count
));
778 * Undo whatever partial work the loop above has done.
784 for (j
= 0; j
< i
; j
++) {
786 ("fdbuf_extract: cleanup[%d] = %d\n", j
, *rp
));
787 (void) closeandsetf(*rp
++, NULL
);
795 * Insert file descriptors into an fdbuf.
796 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
797 * by calling fdbuf_free().
800 fdbuf_create(void *rights
, int rightslen
, struct fdbuf
**fdbufp
)
808 dprint(1, ("fdbuf_create: len %d\n", rightslen
));
810 numfd
= rightslen
/ (int)sizeof (int);
812 fdbufsize
= (int)FDBUF_HDRSIZE
+ (numfd
* (int)sizeof (struct file
*));
813 fdbuf
= kmem_alloc(fdbufsize
, KM_SLEEP
);
814 fdbuf
->fd_size
= fdbufsize
;
816 fdbuf
->fd_ebuf
= NULL
;
817 fdbuf
->fd_ebuflen
= 0;
819 for (i
= 0; i
< numfd
; i
++) {
820 if ((fp
= getf(fds
[i
])) == NULL
) {
824 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
825 i
, fds
[i
], (void *)fp
, fp
->f_count
));
826 mutex_enter(&fp
->f_tlock
);
828 mutex_exit(&fp
->f_tlock
);
830 * The maximum alignment for fdbuf (or any option header
831 * and its value) it 4 bytes. On a LP64 kernel, the alignment
832 * is not sufficient for pointers (fd_fds in this case). Since
833 * we just did a kmem_alloc (we get a double word alignment),
834 * we don't need to do anything on the send side (we loose
835 * the double word alignment because fdbuf goes after an
836 * option header (eg T_unitdata_req) which is only 4 byte
837 * aligned). We take care of this when we extract the file
838 * descriptor in fdbuf_extract or fdbuf_free.
840 fdbuf
->fd_fds
[i
] = fp
;
844 audit_fdsend(fds
[i
], fp
, 0);
851 fdbuf_optlen(int rightslen
)
855 numfd
= rightslen
/ (int)sizeof (int);
857 return ((int)FDBUF_HDRSIZE
+ (numfd
* (int)sizeof (struct file
*)));
861 fdbuf_cmsglen(int fdbuflen
)
863 return (t_uscalar_t
)((fdbuflen
- FDBUF_HDRSIZE
) /
864 (int)sizeof (struct file
*) * (int)sizeof (int));
869 * Return non-zero if the mblk and fdbuf are consistent.
872 fdbuf_verify(mblk_t
*mp
, struct fdbuf
*fdbuf
, int fdbuflen
)
874 if (fdbuflen
>= FDBUF_HDRSIZE
&&
875 fdbuflen
== fdbuf
->fd_size
) {
876 frtn_t
*frp
= mp
->b_datap
->db_frtnp
;
878 * Check that the SO_FILEP portion of the
879 * message has not been modified by
880 * the loopback transport. The sending sockfs generates
881 * a message that is esballoc'ed with the free function
882 * being fdbuf_free() and where free_arg contains the
883 * identical information as the SO_FILEP content.
885 * If any of these constraints are not satisfied we
886 * silently ignore the option.
890 frp
->free_func
== fdbuf_free
&&
891 frp
->free_arg
!= NULL
&&
892 bcmp(frp
->free_arg
, fdbuf
, fdbuflen
) == 0) {
893 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
894 (void *)fdbuf
, fdbuflen
));
897 zcmn_err(getzoneid(), CE_WARN
,
898 "sockfs: mismatched fdbuf content (%p)",
903 zcmn_err(getzoneid(), CE_WARN
,
904 "sockfs: mismatched fdbuf len %d, %d\n",
905 fdbuflen
, fdbuf
->fd_size
);
911 * When the file descriptors returned by sorecvmsg can not be passed
912 * to the application this routine will cleanup the references on
913 * the files. Start at startoff bytes into the buffer.
916 close_fds(void *fdbuf
, int fdbuflen
, int startoff
)
918 int *fds
= (int *)fdbuf
;
919 int numfd
= fdbuflen
/ (int)sizeof (int);
922 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf
, fdbuflen
, startoff
));
924 for (i
= 0; i
< numfd
; i
++) {
927 if (startoff
< (int)sizeof (int)) {
929 * This file descriptor is partially or fully after
933 ("close_fds: cleanup[%d] = %d\n", i
, fds
[i
]));
934 (void) closeandsetf(fds
[i
], NULL
);
936 startoff
-= (int)sizeof (int);
941 * Close all file descriptors contained in the control part starting at
945 so_closefds(void *control
, t_uscalar_t controllen
, int oldflg
,
948 struct cmsghdr
*cmsg
;
954 close_fds(control
, controllen
, startoff
);
957 /* Scan control part for file descriptors. */
958 for (cmsg
= (struct cmsghdr
*)control
;
959 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
960 cmsg
= CMSG_NEXT(cmsg
)) {
961 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
962 cmsg
->cmsg_type
== SCM_RIGHTS
) {
963 close_fds(CMSG_CONTENT(cmsg
),
964 (int)CMSG_CONTENTLEN(cmsg
),
965 startoff
- (int)sizeof (struct cmsghdr
));
967 startoff
-= cmsg
->cmsg_len
;
972 * Returns a pointer/length for the file descriptors contained
973 * in the control buffer. Returns with *fdlenp == -1 if there are no
974 * file descriptor options present. This is different than there being
975 * a zero-length file descriptor option.
976 * Fail if there are multiple SCM_RIGHT cmsgs.
979 so_getfdopt(void *control
, t_uscalar_t controllen
, int oldflg
,
980 void **fdsp
, int *fdlenp
)
982 struct cmsghdr
*cmsg
;
986 if (control
== NULL
) {
997 *fdlenp
= controllen
;
998 dprint(1, ("so_getfdopt: old %d\n", *fdlenp
));
1005 for (cmsg
= (struct cmsghdr
*)control
;
1006 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
1007 cmsg
= CMSG_NEXT(cmsg
)) {
1008 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1009 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1012 fds
= CMSG_CONTENT(cmsg
);
1013 fdlen
= (int)CMSG_CONTENTLEN(cmsg
);
1014 dprint(1, ("so_getfdopt: new %lu\n",
1015 (size_t)CMSG_CONTENTLEN(cmsg
)));
1019 dprint(1, ("so_getfdopt: NONE\n"));
1028 * Return the length of the options including any file descriptor options.
1031 so_optlen(void *control
, t_uscalar_t controllen
, int oldflg
)
1033 struct cmsghdr
*cmsg
;
1034 t_uscalar_t optlen
= 0;
1037 if (control
== NULL
)
1041 return ((t_uscalar_t
)(sizeof (struct T_opthdr
) +
1042 fdbuf_optlen(controllen
)));
1044 for (cmsg
= (struct cmsghdr
*)control
;
1045 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
1046 cmsg
= CMSG_NEXT(cmsg
)) {
1047 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1048 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1049 len
= fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg
));
1051 len
= (t_uscalar_t
)CMSG_CONTENTLEN(cmsg
);
1053 optlen
+= (t_uscalar_t
)(_TPI_ALIGN_TOPT(len
) +
1054 sizeof (struct T_opthdr
));
1056 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1057 controllen
, oldflg
, optlen
));
1062 * Copy options from control to the mblk. Skip any file descriptor options.
1065 so_cmsg2opt(void *control
, t_uscalar_t controllen
, int oldflg
, mblk_t
*mp
)
1067 struct T_opthdr toh
;
1068 struct cmsghdr
*cmsg
;
1070 if (control
== NULL
)
1074 /* No real options - caller has handled file descriptors */
1077 for (cmsg
= (struct cmsghdr
*)control
;
1078 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
1079 cmsg
= CMSG_NEXT(cmsg
)) {
1081 * Note: The caller handles file descriptors prior
1082 * to calling this function.
1086 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1087 cmsg
->cmsg_type
== SCM_RIGHTS
)
1090 len
= (t_uscalar_t
)CMSG_CONTENTLEN(cmsg
);
1091 toh
.level
= cmsg
->cmsg_level
;
1092 toh
.name
= cmsg
->cmsg_type
;
1093 toh
.len
= len
+ (t_uscalar_t
)sizeof (struct T_opthdr
);
1096 soappendmsg(mp
, &toh
, sizeof (toh
));
1097 soappendmsg(mp
, CMSG_CONTENT(cmsg
), len
);
1098 mp
->b_wptr
+= _TPI_ALIGN_TOPT(len
) - len
;
1099 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
1104 * Return the length of the control message derived from the options.
1105 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1106 * When oldflg is set only include SO_FILEP.
1107 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1108 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1109 * also be checked for any possible impacts.
1112 so_cmsglen(mblk_t
*mp
, void *opt
, t_uscalar_t optlen
, int oldflg
)
1114 t_uscalar_t cmsglen
= 0;
1115 struct T_opthdr
*tohp
;
1117 t_uscalar_t last_roundup
= 0;
1119 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1121 for (tohp
= (struct T_opthdr
*)opt
;
1122 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1123 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1124 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1125 tohp
->level
, tohp
->name
, tohp
->len
));
1126 if (tohp
->level
== SOL_SOCKET
&&
1127 (tohp
->name
== SO_SRCADDR
||
1128 tohp
->name
== SO_UNIX_CLOSE
)) {
1131 if (tohp
->level
== SOL_SOCKET
&& tohp
->name
== SO_FILEP
) {
1132 struct fdbuf
*fdbuf
;
1135 fdbuf
= (struct fdbuf
*)_TPI_TOPT_DATA(tohp
);
1136 fdbuflen
= (int)_TPI_TOPT_DATALEN(tohp
);
1138 if (!fdbuf_verify(mp
, fdbuf
, fdbuflen
))
1141 cmsglen
+= fdbuf_cmsglen(fdbuflen
);
1144 len
= fdbuf_cmsglen(fdbuflen
);
1145 } else if (tohp
->level
== SOL_SOCKET
&&
1146 tohp
->name
== SCM_TIMESTAMP
) {
1150 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1151 len
= sizeof (struct timeval
);
1153 len
= sizeof (struct timeval32
);
1158 len
= (t_uscalar_t
)_TPI_TOPT_DATALEN(tohp
);
1161 * Exclude roundup for last option to not set
1162 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1164 last_roundup
= (t_uscalar_t
)
1165 (ROUNDUP_cmsglen(len
+ (int)sizeof (struct cmsghdr
)) -
1166 (len
+ (int)sizeof (struct cmsghdr
)));
1167 cmsglen
+= (t_uscalar_t
)(len
+ (int)sizeof (struct cmsghdr
)) +
1170 cmsglen
-= last_roundup
;
1171 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1172 optlen
, oldflg
, cmsglen
));
1177 * Copy options from options to the control. Convert SO_FILEP to
1179 * Returns errno or zero.
1180 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1181 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1182 * also be checked for any possible impacts.
1185 so_opt2cmsg(mblk_t
*mp
, void *opt
, t_uscalar_t optlen
, int oldflg
,
1186 void *control
, t_uscalar_t controllen
)
1188 struct T_opthdr
*tohp
;
1189 struct cmsghdr
*cmsg
;
1190 struct fdbuf
*fdbuf
;
1193 #if defined(DEBUG) || defined(__lint)
1194 struct cmsghdr
*cend
= (struct cmsghdr
*)
1195 (((uint8_t *)control
) + ROUNDUP_cmsglen(controllen
));
1197 cmsg
= (struct cmsghdr
*)control
;
1199 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1201 for (tohp
= (struct T_opthdr
*)opt
;
1202 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1203 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1204 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1205 tohp
->level
, tohp
->name
, tohp
->len
));
1207 if (tohp
->level
== SOL_SOCKET
&&
1208 (tohp
->name
== SO_SRCADDR
||
1209 tohp
->name
== SO_UNIX_CLOSE
)) {
1212 ASSERT((uintptr_t)cmsg
<= (uintptr_t)control
+ controllen
);
1213 if (tohp
->level
== SOL_SOCKET
&& tohp
->name
== SO_FILEP
) {
1214 fdbuf
= (struct fdbuf
*)_TPI_TOPT_DATA(tohp
);
1215 fdbuflen
= (int)_TPI_TOPT_DATALEN(tohp
);
1217 if (!fdbuf_verify(mp
, fdbuf
, fdbuflen
))
1220 error
= fdbuf_extract(fdbuf
, control
,
1228 fdlen
= (int)fdbuf_cmsglen(
1229 (int)_TPI_TOPT_DATALEN(tohp
));
1231 cmsg
->cmsg_level
= tohp
->level
;
1232 cmsg
->cmsg_type
= SCM_RIGHTS
;
1233 cmsg
->cmsg_len
= (socklen_t
)(fdlen
+
1234 sizeof (struct cmsghdr
));
1236 error
= fdbuf_extract(fdbuf
,
1237 CMSG_CONTENT(cmsg
), fdlen
);
1241 } else if (tohp
->level
== SOL_SOCKET
&&
1242 tohp
->name
== SCM_TIMESTAMP
) {
1243 timestruc_t
*timestamp
;
1248 cmsg
->cmsg_level
= tohp
->level
;
1249 cmsg
->cmsg_type
= tohp
->name
;
1252 (timestruc_t
*)P2ROUNDUP((intptr_t)&tohp
[1],
1255 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1258 cmsg
->cmsg_len
= sizeof (struct timeval
) +
1259 sizeof (struct cmsghdr
);
1260 tv
.tv_sec
= timestamp
->tv_sec
;
1261 tv
.tv_usec
= timestamp
->tv_nsec
/
1262 (NANOSEC
/ MICROSEC
);
1264 * on LP64 systems, the struct timeval in
1265 * the destination will not be 8-byte aligned,
1266 * so use bcopy to avoid alignment trouble
1268 bcopy(&tv
, CMSG_CONTENT(cmsg
), sizeof (tv
));
1270 struct timeval32
*time32
;
1272 cmsg
->cmsg_len
= sizeof (struct timeval32
) +
1273 sizeof (struct cmsghdr
);
1274 time32
= (struct timeval32
*)CMSG_CONTENT(cmsg
);
1275 time32
->tv_sec
= (time32_t
)timestamp
->tv_sec
;
1277 (int32_t)(timestamp
->tv_nsec
/
1278 (NANOSEC
/ MICROSEC
));
1285 cmsg
->cmsg_level
= tohp
->level
;
1286 cmsg
->cmsg_type
= tohp
->name
;
1287 cmsg
->cmsg_len
= (socklen_t
)(_TPI_TOPT_DATALEN(tohp
) +
1288 sizeof (struct cmsghdr
));
1290 /* copy content to control data part */
1291 bcopy(&tohp
[1], CMSG_CONTENT(cmsg
),
1292 CMSG_CONTENTLEN(cmsg
));
1294 /* move to next CMSG structure! */
1295 cmsg
= CMSG_NEXT(cmsg
);
1297 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1298 control
, controllen
, (void *)cend
, (void *)cmsg
));
1299 ASSERT(cmsg
<= cend
);
1304 * Extract the SO_SRCADDR option value if present.
1307 so_getopt_srcaddr(void *opt
, t_uscalar_t optlen
, void **srcp
,
1308 t_uscalar_t
*srclenp
)
1310 struct T_opthdr
*tohp
;
1312 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1314 ASSERT(srcp
!= NULL
&& srclenp
!= NULL
);
1318 for (tohp
= (struct T_opthdr
*)opt
;
1319 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1320 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1321 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1322 tohp
->level
, tohp
->name
, tohp
->len
));
1323 if (tohp
->level
== SOL_SOCKET
&&
1324 tohp
->name
== SO_SRCADDR
) {
1325 *srcp
= _TPI_TOPT_DATA(tohp
);
1326 *srclenp
= (t_uscalar_t
)_TPI_TOPT_DATALEN(tohp
);
1332 * Verify if the SO_UNIX_CLOSE option is present.
1335 so_getopt_unix_close(void *opt
, t_uscalar_t optlen
)
1337 struct T_opthdr
*tohp
;
1339 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1341 for (tohp
= (struct T_opthdr
*)opt
;
1342 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1343 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1345 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1346 tohp
->level
, tohp
->name
, tohp
->len
));
1347 if (tohp
->level
== SOL_SOCKET
&&
1348 tohp
->name
== SO_UNIX_CLOSE
)
1355 * Allocate an M_PROTO message.
1357 * If allocation fails the behavior depends on sleepflg:
1358 * _ALLOC_NOSLEEP fail immediately
1359 * _ALLOC_INTR sleep for memory until a signal is caught
1360 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1363 soallocproto(size_t size
, int sleepflg
, cred_t
*cr
)
1367 /* Round up size for reuse */
1368 size
= MAX(size
, 64);
1370 mp
= allocb_cred(size
, cr
, curproc
->p_pid
);
1372 mp
= allocb(size
, BPRI_MED
);
1375 int error
; /* Dummy - error not returned to caller */
1380 mp
= allocb_cred_wait(size
, STR_NOSIG
, &error
,
1381 cr
, curproc
->p_pid
);
1383 mp
= allocb_wait(size
, BPRI_MED
, STR_NOSIG
,
1390 mp
= allocb_cred_wait(size
, 0, &error
, cr
,
1393 mp
= allocb_wait(size
, BPRI_MED
, 0, &error
);
1396 /* Caught signal while sleeping for memory */
1397 eprintline(ENOBUFS
);
1401 case _ALLOC_NOSLEEP
:
1403 eprintline(ENOBUFS
);
1407 DB_TYPE(mp
) = M_PROTO
;
1412 * Allocate an M_PROTO message with a single component.
1413 * len is the length of buf. size is the amount to allocate.
1415 * buf can be NULL with a non-zero len.
1416 * This results in a bzero'ed chunk being placed the message.
1419 soallocproto1(const void *buf
, ssize_t len
, ssize_t size
, int sleepflg
,
1427 ASSERT(size
>= len
);
1428 /* Round up size for reuse */
1429 size
= MAX(size
, 64);
1430 mp
= soallocproto(size
, sleepflg
, cr
);
1433 mp
->b_datap
->db_type
= M_PROTO
;
1436 bcopy(buf
, mp
->b_wptr
, len
);
1438 bzero(mp
->b_wptr
, len
);
1445 * Append buf/len to mp.
1446 * The caller has to ensure that there is enough room in the mblk.
1448 * buf can be NULL with a non-zero len.
1449 * This results in a bzero'ed chunk being placed the message.
1452 soappendmsg(mblk_t
*mp
, const void *buf
, ssize_t len
)
1457 /* Assert for room left */
1458 ASSERT(mp
->b_datap
->db_lim
- mp
->b_wptr
>= len
);
1460 bcopy(buf
, mp
->b_wptr
, len
);
1462 bzero(mp
->b_wptr
, len
);
1468 * Create a message using two kernel buffers.
1469 * If size is set that will determine the allocation size (e.g. for future
1470 * soappendmsg calls). If size is zero it is derived from the buffer
1474 soallocproto2(const void *buf1
, ssize_t len1
, const void *buf2
, ssize_t len2
,
1475 ssize_t size
, int sleepflg
, cred_t
*cr
)
1481 ASSERT(size
>= len1
+ len2
);
1483 mp
= soallocproto1(buf1
, len1
, size
, sleepflg
, cr
);
1485 soappendmsg(mp
, buf2
, len2
);
1490 * Create a message using three kernel buffers.
1491 * If size is set that will determine the allocation size (for future
1492 * soappendmsg calls). If size is zero it is derived from the buffer
1496 soallocproto3(const void *buf1
, ssize_t len1
, const void *buf2
, ssize_t len2
,
1497 const void *buf3
, ssize_t len3
, ssize_t size
, int sleepflg
, cred_t
*cr
)
1502 size
= len1
+ len2
+len3
;
1503 ASSERT(size
>= len1
+ len2
+ len3
);
1505 mp
= soallocproto1(buf1
, len1
, size
, sleepflg
, cr
);
1507 soappendmsg(mp
, buf2
, len2
);
1508 soappendmsg(mp
, buf3
, len3
);
1515 pr_state(uint_t state
, uint_t mode
)
1517 static char buf
[1024];
1520 if (state
& SS_ISCONNECTED
)
1521 (void) strcat(buf
, "ISCONNECTED ");
1522 if (state
& SS_ISCONNECTING
)
1523 (void) strcat(buf
, "ISCONNECTING ");
1524 if (state
& SS_ISDISCONNECTING
)
1525 (void) strcat(buf
, "ISDISCONNECTING ");
1526 if (state
& SS_CANTSENDMORE
)
1527 (void) strcat(buf
, "CANTSENDMORE ");
1529 if (state
& SS_CANTRCVMORE
)
1530 (void) strcat(buf
, "CANTRCVMORE ");
1531 if (state
& SS_ISBOUND
)
1532 (void) strcat(buf
, "ISBOUND ");
1533 if (state
& SS_NDELAY
)
1534 (void) strcat(buf
, "NDELAY ");
1535 if (state
& SS_NONBLOCK
)
1536 (void) strcat(buf
, "NONBLOCK ");
1538 if (state
& SS_ASYNC
)
1539 (void) strcat(buf
, "ASYNC ");
1540 if (state
& SS_ACCEPTCONN
)
1541 (void) strcat(buf
, "ACCEPTCONN ");
1542 if (state
& SS_SAVEDEOR
)
1543 (void) strcat(buf
, "SAVEDEOR ");
1545 if (state
& SS_RCVATMARK
)
1546 (void) strcat(buf
, "RCVATMARK ");
1547 if (state
& SS_OOBPEND
)
1548 (void) strcat(buf
, "OOBPEND ");
1549 if (state
& SS_HAVEOOBDATA
)
1550 (void) strcat(buf
, "HAVEOOBDATA ");
1551 if (state
& SS_HADOOBDATA
)
1552 (void) strcat(buf
, "HADOOBDATA ");
1555 (void) strcat(buf
, "PRIV ");
1556 if (mode
& SM_ATOMIC
)
1557 (void) strcat(buf
, "ATOMIC ");
1559 (void) strcat(buf
, "ADDR ");
1560 if (mode
& SM_CONNREQUIRED
)
1561 (void) strcat(buf
, "CONNREQUIRED ");
1563 if (mode
& SM_FDPASSING
)
1564 (void) strcat(buf
, "FDPASSING ");
1565 if (mode
& SM_EXDATA
)
1566 (void) strcat(buf
, "EXDATA ");
1567 if (mode
& SM_OPTDATA
)
1568 (void) strcat(buf
, "OPTDATA ");
1569 if (mode
& SM_BYTESTREAM
)
1570 (void) strcat(buf
, "BYTESTREAM ");
1575 pr_addr(int family
, struct sockaddr
*addr
, t_uscalar_t addrlen
)
1577 static char buf
[1024];
1579 if (addr
== NULL
|| addrlen
== 0) {
1580 (void) sprintf(buf
, "(len %d) %p", addrlen
, (void *)addr
);
1585 struct sockaddr_in sin
;
1587 bcopy(addr
, &sin
, sizeof (sin
));
1589 (void) sprintf(buf
, "(len %d) %x/%d",
1590 addrlen
, ntohl(sin
.sin_addr
.s_addr
), ntohs(sin
.sin_port
));
1594 struct sockaddr_in6 sin6
;
1595 uint16_t *piece
= (uint16_t *)&sin6
.sin6_addr
;
1597 bcopy((char *)addr
, (char *)&sin6
, sizeof (sin6
));
1598 (void) sprintf(buf
, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1600 ntohs(piece
[0]), ntohs(piece
[1]),
1601 ntohs(piece
[2]), ntohs(piece
[3]),
1602 ntohs(piece
[4]), ntohs(piece
[5]),
1603 ntohs(piece
[6]), ntohs(piece
[7]),
1604 ntohs(sin6
.sin6_port
));
1608 struct sockaddr_un
*soun
= (struct sockaddr_un
*)addr
;
1610 (void) sprintf(buf
, "(len %d) %s", addrlen
,
1611 (soun
== NULL
) ? "(none)" : soun
->sun_path
);
1615 (void) sprintf(buf
, "(unknown af %d)", family
);
1621 /* The logical equivalence operator (a if-and-only-if b) */
1622 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1625 * Verify limitations and invariants on oob state.
1626 * Return 1 if OK, otherwise 0 so that it can be used as
1627 * ASSERT(verify_oobstate(so));
1630 so_verify_oobstate(struct sonode
*so
)
1634 ASSERT(MUTEX_HELD(&so
->so_lock
));
1637 * The possible state combinations are:
1640 * SS_OOBPEND|SS_HAVEOOBDATA
1641 * SS_OOBPEND|SS_HADOOBDATA
1644 switch (so
->so_state
& (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_HADOOBDATA
)) {
1647 case SS_OOBPEND
|SS_HAVEOOBDATA
:
1648 case SS_OOBPEND
|SS_HADOOBDATA
:
1652 printf("Bad oob state 1 (%p): state %s\n",
1653 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1657 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1658 if ((so
->so_state
& (SS_RCVATMARK
|SS_OOBPEND
)) == SS_RCVATMARK
) {
1659 printf("Bad oob state 2 (%p): state %s\n",
1660 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1665 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1666 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1668 havemark
= (SOCK_IS_NONSTR(so
)) ? so
->so_oobmark
> 0 :
1669 SOTOTPI(so
)->sti_oobsigcnt
> 0;
1671 if (!EQUIVALENT(havemark
|| (so
->so_state
& SS_RCVATMARK
),
1672 so
->so_state
& SS_OOBPEND
)) {
1673 printf("Bad oob state 3 (%p): state %s\n",
1674 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1679 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1681 if (!(so
->so_options
& SO_OOBINLINE
) &&
1682 !EQUIVALENT(so
->so_oobmsg
!= NULL
, so
->so_state
& SS_HAVEOOBDATA
)) {
1683 printf("Bad oob state 4 (%p): state %s\n",
1684 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1688 if (!SOCK_IS_NONSTR(so
) &&
1689 SOTOTPI(so
)->sti_oobsigcnt
< SOTOTPI(so
)->sti_oobcnt
) {
1690 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1691 (void *)so
, SOTOTPI(so
)->sti_oobsigcnt
,
1692 SOTOTPI(so
)->sti_oobcnt
,
1693 pr_state(so
->so_state
, so
->so_mode
));
1702 /* initialize sockfs zone specific kstat related items */
1704 sock_kstat_init(zoneid_t zoneid
)
1708 ksp
= kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1709 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VAR_SIZE
|KSTAT_FLAG_VIRTUAL
, zoneid
);
1712 ksp
->ks_update
= sockfs_update
;
1713 ksp
->ks_snapshot
= sockfs_snapshot
;
1714 ksp
->ks_lock
= &socklist
.sl_lock
;
1715 ksp
->ks_private
= (void *)(uintptr_t)zoneid
;
1722 /* tear down sockfs zone specific kstat related items */
1725 sock_kstat_fini(zoneid_t zoneid
, void *arg
)
1727 kstat_t
*ksp
= (kstat_t
*)arg
;
1730 ASSERT(zoneid
== (zoneid_t
)(uintptr_t)ksp
->ks_private
);
1737 * Note that nactive is going to be different for each zone.
1738 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1739 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1740 * buffer. This is safe, but if the buffer is too small, user will not be
1741 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1742 * driver will keep it locked between the update and the snapshot, so no
1743 * other process (zone) can currently get inbetween resulting in a wrong size
1744 * buffer allocation.
1747 sockfs_update(kstat_t
*ksp
, int rw
)
1749 uint_t nactive
= 0; /* # of active AF_UNIX sockets */
1750 struct sonode
*so
; /* current sonode on socklist */
1751 zoneid_t myzoneid
= (zoneid_t
)(uintptr_t)ksp
->ks_private
;
1753 ASSERT((zoneid_t
)(uintptr_t)ksp
->ks_private
== getzoneid());
1755 if (rw
== KSTAT_WRITE
) { /* bounce all writes */
1759 for (so
= socklist
.sl_list
; so
!= NULL
; so
= SOTOTPI(so
)->sti_next_so
) {
1760 if (so
->so_count
!= 0 && so
->so_zoneid
== myzoneid
) {
1764 ksp
->ks_ndata
= nactive
;
1765 ksp
->ks_data_size
= nactive
* sizeof (struct k_sockinfo
);
1771 sockfs_snapshot(kstat_t
*ksp
, void *buf
, int rw
)
1773 int ns
; /* # of sonodes we've copied */
1774 struct sonode
*so
; /* current sonode on socklist */
1775 struct k_sockinfo
*pksi
; /* where we put sockinfo data */
1776 t_uscalar_t sn_len
; /* soa_len */
1777 zoneid_t myzoneid
= (zoneid_t
)(uintptr_t)ksp
->ks_private
;
1780 ASSERT((zoneid_t
)(uintptr_t)ksp
->ks_private
== getzoneid());
1782 ksp
->ks_snaptime
= gethrtime();
1784 if (rw
== KSTAT_WRITE
) { /* bounce all writes */
1789 * for each sonode on the socklist, we massage the important
1790 * info into buf, in k_sockinfo format.
1792 pksi
= (struct k_sockinfo
*)buf
;
1794 for (so
= socklist
.sl_list
; so
!= NULL
; so
= SOTOTPI(so
)->sti_next_so
) {
1795 /* only stuff active sonodes and the same zone: */
1796 if (so
->so_count
== 0 || so
->so_zoneid
!= myzoneid
) {
1801 * If the sonode was activated between the update and the
1802 * snapshot, we're done - as this is only a snapshot.
1804 if ((caddr_t
)(pksi
) >= (caddr_t
)buf
+ ksp
->ks_data_size
) {
1809 /* copy important info into buf: */
1810 pksi
->ks_si
.si_size
= sizeof (struct k_sockinfo
);
1811 pksi
->ks_si
.si_family
= so
->so_family
;
1812 pksi
->ks_si
.si_type
= so
->so_type
;
1813 pksi
->ks_si
.si_flag
= so
->so_flag
;
1814 pksi
->ks_si
.si_state
= so
->so_state
;
1815 pksi
->ks_si
.si_serv_type
= sti
->sti_serv_type
;
1816 pksi
->ks_si
.si_ux_laddr_sou_magic
=
1817 sti
->sti_ux_laddr
.soua_magic
;
1818 pksi
->ks_si
.si_ux_faddr_sou_magic
=
1819 sti
->sti_ux_faddr
.soua_magic
;
1820 pksi
->ks_si
.si_laddr_soa_len
= sti
->sti_laddr
.soa_len
;
1821 pksi
->ks_si
.si_faddr_soa_len
= sti
->sti_faddr
.soa_len
;
1822 pksi
->ks_si
.si_szoneid
= so
->so_zoneid
;
1823 pksi
->ks_si
.si_faddr_noxlate
= sti
->sti_faddr_noxlate
;
1825 mutex_enter(&so
->so_lock
);
1827 if (sti
->sti_laddr_sa
!= NULL
) {
1828 ASSERT(sti
->sti_laddr_sa
->sa_data
!= NULL
);
1829 sn_len
= sti
->sti_laddr_len
;
1830 ASSERT(sn_len
<= sizeof (short) +
1831 sizeof (pksi
->ks_si
.si_laddr_sun_path
));
1833 pksi
->ks_si
.si_laddr_family
=
1834 sti
->sti_laddr_sa
->sa_family
;
1836 /* AF_UNIX socket names are NULL terminated */
1837 (void) strncpy(pksi
->ks_si
.si_laddr_sun_path
,
1838 sti
->sti_laddr_sa
->sa_data
,
1839 sizeof (pksi
->ks_si
.si_laddr_sun_path
));
1840 sn_len
= strlen(pksi
->ks_si
.si_laddr_sun_path
);
1842 pksi
->ks_si
.si_laddr_sun_path
[sn_len
] = 0;
1845 if (sti
->sti_faddr_sa
!= NULL
) {
1846 ASSERT(sti
->sti_faddr_sa
->sa_data
!= NULL
);
1847 sn_len
= sti
->sti_faddr_len
;
1848 ASSERT(sn_len
<= sizeof (short) +
1849 sizeof (pksi
->ks_si
.si_faddr_sun_path
));
1851 pksi
->ks_si
.si_faddr_family
=
1852 sti
->sti_faddr_sa
->sa_family
;
1854 (void) strncpy(pksi
->ks_si
.si_faddr_sun_path
,
1855 sti
->sti_faddr_sa
->sa_data
,
1856 sizeof (pksi
->ks_si
.si_faddr_sun_path
));
1857 sn_len
= strlen(pksi
->ks_si
.si_faddr_sun_path
);
1859 pksi
->ks_si
.si_faddr_sun_path
[sn_len
] = 0;
1862 mutex_exit(&so
->so_lock
);
1864 (void) sprintf(pksi
->ks_straddr
[0], "%p", (void *)so
);
1865 (void) sprintf(pksi
->ks_straddr
[1], "%p",
1866 (void *)sti
->sti_ux_laddr
.soua_vp
);
1867 (void) sprintf(pksi
->ks_straddr
[2], "%p",
1868 (void *)sti
->sti_ux_faddr
.soua_vp
);
1879 soreadfile(file_t
*fp
, uchar_t
*buf
, u_offset_t fileoff
, int *err
, size_t size
)
1882 struct iovec aiov
[MSG_MAXIOVLEN
];
1883 register vnode_t
*vp
;
1894 aiov
[0].iov_base
= (caddr_t
)buf
;
1895 aiov
[0].iov_len
= size
;
1897 cnt
= (ssize_t
)size
;
1898 (void) VOP_RWLOCK(vp
, rwflag
, NULL
);
1900 auio
.uio_loffset
= fileoff
;
1901 auio
.uio_iov
= aiov
;
1902 auio
.uio_iovcnt
= iovcnt
;
1903 auio
.uio_resid
= cnt
;
1904 auio
.uio_segflg
= UIO_SYSSPACE
;
1905 auio
.uio_llimit
= MAXOFFSET_T
;
1906 auio
.uio_fmode
= fflag
;
1907 auio
.uio_extflg
= UIO_COPY_CACHED
;
1909 ioflag
= auio
.uio_fmode
& (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1911 /* If read sync is not asked for, filter sync flags */
1912 if ((ioflag
& FRSYNC
) == 0)
1913 ioflag
&= ~(FSYNC
|FDSYNC
);
1914 error
= VOP_READ(vp
, &auio
, ioflag
, fp
->f_cred
, NULL
);
1915 cnt
-= auio
.uio_resid
;
1917 VOP_RWUNLOCK(vp
, rwflag
, NULL
);
1919 if (error
== EINTR
&& cnt
!= 0)
1932 so_copyin(const void *from
, void *to
, size_t size
, int fromkernel
)
1935 bcopy(from
, to
, size
);
1938 return (xcopyin(from
, to
, size
));
1942 so_copyout(const void *from
, void *to
, size_t size
, int tokernel
)
1945 bcopy(from
, to
, size
);
1948 return (xcopyout(from
, to
, size
));