4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
30 #include <sys/debug.h>
31 #include <sys/cmn_err.h>
33 #include <sys/policy.h>
34 #include <sys/modctl.h>
36 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/stropts.h>
40 #include <sys/strsubr.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
45 #include <inet/ipclassifier.h>
46 #include <fs/sockfs/sockcommon.h>
47 #include <fs/sockfs/sockfilter_impl.h>
48 #include <fs/sockfs/nl7c.h>
49 #include <fs/sockfs/socktpi.h>
50 #include <fs/sockfs/sodirect.h>
53 extern int xnet_skip_checks
, xnet_check_print
, xnet_truncate_print
;
56 * Common socket access functions.
58 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
59 * the socket_xxx() function should be used.
63 * Try to create a new sonode of the requested <family, type, protocol>.
67 socket_create(int family
, int type
, int protocol
, char *devpath
, char *mod
,
68 int flags
, int version
, struct cred
*cr
, int *errorp
)
71 struct sockparams
*sp
= NULL
;
75 * Look for a sockparams entry that match the given criteria.
76 * solookup() returns with the entry held.
78 *errorp
= solookup(family
, type
, protocol
, &sp
);
79 saved_error
= *errorp
;
81 int kmflags
= (flags
== SOCKET_SLEEP
) ? KM_SLEEP
: KM_NOSLEEP
;
83 * There is no matching sockparams entry. An ephemeral entry is
84 * created if the caller specifies a device or a socket module.
86 if (devpath
!= NULL
) {
88 sp
= sockparams_hold_ephemeral_bydev(family
, type
,
89 protocol
, devpath
, kmflags
, errorp
);
90 } else if (mod
!= NULL
) {
92 sp
= sockparams_hold_ephemeral_bymod(family
, type
,
93 protocol
, mod
, kmflags
, errorp
);
95 *errorp
= solookup(family
, type
, 0, &sp
);
99 if (saved_error
&& (*errorp
== EPROTONOSUPPORT
||
100 *errorp
== EPROTOTYPE
|| *errorp
== ENOPROTOOPT
))
101 *errorp
= saved_error
;
106 ASSERT(sp
->sp_smod_info
!= NULL
);
107 ASSERT(flags
== SOCKET_SLEEP
|| flags
== SOCKET_NOSLEEP
);
108 sp
->sp_stats
.sps_ncreate
.value
.ui64
++;
109 so
= sp
->sp_smod_info
->smod_sock_create_func(sp
, family
, type
,
110 protocol
, version
, flags
, errorp
, cr
);
112 SOCKPARAMS_DEC_REF(sp
);
114 if ((*errorp
= SOP_INIT(so
, NULL
, cr
, flags
)) == 0) {
115 /* Cannot fail, only bumps so_count */
116 (void) VOP_OPEN(&SOTOV(so
), FREAD
|FWRITE
, cr
, NULL
);
118 if (saved_error
&& (*errorp
== EPROTONOSUPPORT
||
119 *errorp
== EPROTOTYPE
|| *errorp
== ENOPROTOOPT
))
120 *errorp
= saved_error
;
129 socket_newconn(struct sonode
*parent
, sock_lower_handle_t lh
,
130 sock_downcalls_t
*dc
, int flags
, int *errorp
)
133 struct sockparams
*sp
;
136 if ((cr
= CRED()) == NULL
)
139 sp
= parent
->so_sockparams
;
142 sp
->sp_stats
.sps_ncreate
.value
.ui64
++;
143 so
= sp
->sp_smod_info
->smod_sock_create_func(sp
, parent
->so_family
,
144 parent
->so_type
, parent
->so_protocol
, parent
->so_version
, flags
,
147 SOCKPARAMS_INC_REF(sp
);
149 so
->so_proto_handle
= lh
;
150 so
->so_downcalls
= dc
;
152 * This function may be called in interrupt context, and CRED()
153 * will be NULL. In this case, pass in kcred.
155 if ((*errorp
= SOP_INIT(so
, parent
, cr
, flags
)) == 0) {
156 /* Cannot fail, only bumps so_count */
157 (void) VOP_OPEN(&SOTOV(so
), FREAD
|FWRITE
, cr
, NULL
);
168 * Bind local endpoint.
171 socket_bind(struct sonode
*so
, struct sockaddr
*name
, socklen_t namelen
,
172 int flags
, cred_t
*cr
)
174 return (SOP_BIND(so
, name
, namelen
, flags
, cr
));
178 * Turn socket into a listen socket.
181 socket_listen(struct sonode
*so
, int backlog
, cred_t
*cr
)
188 * Use the same qlimit as in BSD. BSD checks the qlimit
189 * before queuing the next connection implying that a
190 * listen(sock, 0) allows one connection to be queued.
191 * BSD also uses 1.5 times the requested backlog.
193 * XNS Issue 4 required a strict interpretation of the backlog.
194 * This has been waived subsequently for Issue 4 and the change
195 * incorporated in XNS Issue 5. So we aren't required to do
196 * anything special for XPG apps.
198 if (backlog
>= (INT_MAX
- 1) / 3)
201 backlog
= backlog
* 3 / 2 + 1;
203 return (SOP_LISTEN(so
, backlog
, cr
));
207 * Accept incoming connection.
210 socket_accept(struct sonode
*lso
, int fflag
, cred_t
*cr
, struct sonode
**nsop
)
212 return (SOP_ACCEPT(lso
, fflag
, cr
, nsop
));
219 socket_connect(struct sonode
*so
, struct sockaddr
*name
,
220 socklen_t namelen
, int fflag
, int flags
, cred_t
*cr
)
225 * Handle a connect to a name parameter of type AF_UNSPEC like a
226 * connect to a null address. This is the portable method to
227 * unconnect a socket.
229 if ((namelen
>= sizeof (sa_family_t
)) &&
230 (name
->sa_family
== AF_UNSPEC
)) {
235 error
= SOP_CONNECT(so
, name
, namelen
, fflag
, flags
, cr
);
237 if (error
== EHOSTUNREACH
&& flags
& _SOCONNECT_XPG4_2
) {
239 * X/Open specification contains a requirement that
240 * ENETUNREACH be returned but does not require
241 * EHOSTUNREACH. In order to keep the test suite
242 * happy we mess with the errno here.
251 * Get address of remote node.
254 socket_getpeername(struct sonode
*so
, struct sockaddr
*addr
,
255 socklen_t
*addrlen
, boolean_t accept
, cred_t
*cr
)
257 ASSERT(*addrlen
> 0);
258 return (SOP_GETPEERNAME(so
, addr
, addrlen
, accept
, cr
));
266 socket_getsockname(struct sonode
*so
, struct sockaddr
*addr
,
267 socklen_t
*addrlen
, cred_t
*cr
)
269 return (SOP_GETSOCKNAME(so
, addr
, addrlen
, cr
));
274 * Called from shutdown().
277 socket_shutdown(struct sonode
*so
, int how
, cred_t
*cr
)
279 return (SOP_SHUTDOWN(so
, how
, cr
));
283 * Get socket options.
287 socket_getsockopt(struct sonode
*so
, int level
, int option_name
,
288 void *optval
, socklen_t
*optlenp
, int flags
, cred_t
*cr
)
290 return (SOP_GETSOCKOPT(so
, level
, option_name
, optval
,
291 optlenp
, flags
, cr
));
298 socket_setsockopt(struct sonode
*so
, int level
, int option_name
,
299 const void *optval
, t_uscalar_t optlen
, cred_t
*cr
)
302 /* Caller allocates aligned optval, or passes null */
303 ASSERT(((uintptr_t)optval
& (sizeof (t_scalar_t
) - 1)) == 0);
304 /* If optval is null optlen is 0, and vice-versa */
305 ASSERT(optval
!= NULL
|| optlen
== 0);
306 ASSERT(optlen
!= 0 || optval
== NULL
);
308 if (optval
== NULL
&& optlen
== 0)
311 return (SOP_SETSOCKOPT(so
, level
, option_name
, optval
, optlen
, cr
));
315 socket_sendmsg(struct sonode
*so
, struct nmsghdr
*msg
, struct uio
*uiop
,
319 ssize_t orig_resid
= uiop
->uio_resid
;
322 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
324 if (so
->so_family
== AF_UNIX
)
325 uiop
->uio_extflg
|= UIO_COPY_CACHED
;
327 uiop
->uio_extflg
&= ~UIO_COPY_CACHED
;
329 error
= SOP_SENDMSG(so
, msg
, uiop
, cr
);
335 /* EAGAIN is EWOULDBLOCK */
337 /* We did a partial send */
338 if (uiop
->uio_resid
!= orig_resid
)
342 if ((so
->so_mode
& SM_KERNEL
) == 0)
343 tsignal(curthread
, SIGPIPE
);
351 socket_sendmblk(struct sonode
*so
, struct nmsghdr
*msg
, int fflag
,
352 struct cred
*cr
, mblk_t
**mpp
)
356 error
= SOP_SENDMBLK(so
, msg
, fflag
, cr
, mpp
);
357 if (error
== EPIPE
) {
358 tsignal(curthread
, SIGPIPE
);
364 socket_recvmsg(struct sonode
*so
, struct nmsghdr
*msg
, struct uio
*uiop
,
368 ssize_t orig_resid
= uiop
->uio_resid
;
371 * Do not bypass the cache when reading data, as the application
372 * is likely to access the data shortly.
374 uiop
->uio_extflg
|= UIO_COPY_CACHED
;
376 error
= SOP_RECVMSG(so
, msg
, uiop
, cr
);
380 /* EAGAIN is EWOULDBLOCK */
382 /* We did a partial read */
383 if (uiop
->uio_resid
!= orig_resid
)
393 socket_ioctl(struct sonode
*so
, int cmd
, intptr_t arg
, int mode
,
394 struct cred
*cr
, int32_t *rvalp
)
396 return (SOP_IOCTL(so
, cmd
, arg
, mode
, cr
, rvalp
));
400 socket_poll(struct sonode
*so
, short events
, int anyyet
, short *reventsp
,
401 struct pollhead
**phpp
)
403 return (SOP_POLL(so
, events
, anyyet
, reventsp
, phpp
));
407 socket_close(struct sonode
*so
, int flag
, struct cred
*cr
)
409 return (VOP_CLOSE(SOTOV(so
), flag
, 1, 0, cr
, NULL
));
413 socket_close_internal(struct sonode
*so
, int flag
, cred_t
*cr
)
415 ASSERT(so
->so_count
== 0);
417 return (SOP_CLOSE(so
, flag
, cr
));
421 socket_destroy(struct sonode
*so
)
423 vn_invalid(SOTOV(so
));
429 socket_destroy_internal(struct sonode
*so
, cred_t
*cr
)
431 struct sockparams
*sp
= so
->so_sockparams
;
432 ASSERT(so
->so_count
== 0 && sp
!= NULL
);
434 sp
->sp_smod_info
->smod_sock_destroy_func(so
);
436 SOCKPARAMS_DEC_REF(sp
);
440 * TODO Once the common vnode ops is available, then the vnops argument
445 sonode_constructor(void *buf
, void *cdrarg
, int kmflags
)
447 struct sonode
*so
= buf
;
450 vp
= so
->so_vnode
= vn_alloc(kmflags
);
455 vn_setops(vp
, socket_vnodeops
);
458 so
->so_oobmsg
= NULL
;
460 so
->so_proto_handle
= NULL
;
462 so
->so_peercred
= NULL
;
464 so
->so_rcv_queued
= 0;
465 so
->so_rcv_q_head
= NULL
;
466 so
->so_rcv_q_last_head
= NULL
;
467 so
->so_rcv_head
= NULL
;
468 so
->so_rcv_last_head
= NULL
;
469 so
->so_rcv_wanted
= 0;
470 so
->so_rcv_timer_interval
= SOCKET_NO_RCVTIMER
;
471 so
->so_rcv_timer_tid
= 0;
472 so
->so_rcv_thresh
= 0;
474 list_create(&so
->so_acceptq_list
, sizeof (struct sonode
),
475 offsetof(struct sonode
, so_acceptq_node
));
476 list_create(&so
->so_acceptq_defer
, sizeof (struct sonode
),
477 offsetof(struct sonode
, so_acceptq_node
));
478 list_link_init(&so
->so_acceptq_node
);
479 so
->so_acceptq_len
= 0;
481 so
->so_listener
= NULL
;
483 so
->so_snd_qfull
= B_FALSE
;
485 so
->so_filter_active
= 0;
486 so
->so_filter_tx
= 0;
487 so
->so_filter_defertime
= 0;
488 so
->so_filter_top
= NULL
;
489 so
->so_filter_bottom
= NULL
;
491 mutex_init(&so
->so_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
492 mutex_init(&so
->so_acceptq_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
493 rw_init(&so
->so_fallback_rwlock
, NULL
, RW_DEFAULT
, NULL
);
494 cv_init(&so
->so_state_cv
, NULL
, CV_DEFAULT
, NULL
);
495 cv_init(&so
->so_single_cv
, NULL
, CV_DEFAULT
, NULL
);
496 cv_init(&so
->so_read_cv
, NULL
, CV_DEFAULT
, NULL
);
498 cv_init(&so
->so_acceptq_cv
, NULL
, CV_DEFAULT
, NULL
);
499 cv_init(&so
->so_snd_cv
, NULL
, CV_DEFAULT
, NULL
);
500 cv_init(&so
->so_rcv_cv
, NULL
, CV_DEFAULT
, NULL
);
501 cv_init(&so
->so_copy_cv
, NULL
, CV_DEFAULT
, NULL
);
502 cv_init(&so
->so_closing_cv
, NULL
, CV_DEFAULT
, NULL
);
509 sonode_destructor(void *buf
, void *cdrarg
)
511 struct sonode
*so
= buf
;
512 struct vnode
*vp
= SOTOV(so
);
514 ASSERT(so
->so_priv
== NULL
);
515 ASSERT(so
->so_peercred
== NULL
);
517 ASSERT(so
->so_oobmsg
== NULL
);
519 ASSERT(so
->so_rcv_q_head
== NULL
);
521 list_destroy(&so
->so_acceptq_list
);
522 list_destroy(&so
->so_acceptq_defer
);
523 ASSERT(!list_link_active(&so
->so_acceptq_node
));
524 ASSERT(so
->so_listener
== NULL
);
526 ASSERT(so
->so_filter_active
== 0);
527 ASSERT(so
->so_filter_tx
== 0);
528 ASSERT(so
->so_filter_top
== NULL
);
529 ASSERT(so
->so_filter_bottom
== NULL
);
531 ASSERT(vp
->v_data
== so
);
532 ASSERT(vn_matchops(vp
, socket_vnodeops
));
536 mutex_destroy(&so
->so_lock
);
537 mutex_destroy(&so
->so_acceptq_lock
);
538 rw_destroy(&so
->so_fallback_rwlock
);
540 cv_destroy(&so
->so_state_cv
);
541 cv_destroy(&so
->so_single_cv
);
542 cv_destroy(&so
->so_read_cv
);
543 cv_destroy(&so
->so_acceptq_cv
);
544 cv_destroy(&so
->so_snd_cv
);
545 cv_destroy(&so
->so_rcv_cv
);
546 cv_destroy(&so
->so_closing_cv
);
550 sonode_init(struct sonode
*so
, struct sockparams
*sp
, int family
,
551 int type
, int protocol
, sonodeops_t
*sops
)
564 so
->so_family
= family
;
566 so
->so_protocol
= protocol
;
568 SOCK_CONNID_INIT(so
->so_proto_connid
);
571 so
->so_linger
.l_onoff
= 0;
572 so
->so_linger
.l_linger
= 0;
577 so
->so_xpg_rcvbuf
= 0;
579 ASSERT(so
->so_oobmsg
== NULL
);
583 ASSERT(so
->so_peercred
== NULL
);
585 so
->so_zoneid
= getzoneid();
587 so
->so_sockparams
= sp
;
591 so
->so_not_str
= (sops
!= &sotpi_sonodeops
);
593 so
->so_proto_handle
= NULL
;
595 so
->so_downcalls
= NULL
;
600 vp
->v_vfsp
= rootvfs
;
602 vp
->v_rdev
= sockdev
;
604 so
->so_snd_qfull
= B_FALSE
;
607 so
->so_rcv_wakeup
= B_FALSE
;
608 so
->so_snd_wakeup
= B_FALSE
;
609 so
->so_flowctrld
= B_FALSE
;
612 bzero(&so
->so_poll_list
, sizeof (so
->so_poll_list
));
613 bzero(&so
->so_proto_props
, sizeof (struct sock_proto_props
));
615 bzero(&(so
->so_ksock_callbacks
), sizeof (ksocket_callbacks_t
));
616 so
->so_ksock_cb_arg
= NULL
;
618 so
->so_max_addr_len
= sizeof (struct sockaddr_storage
);
620 so
->so_direct
= NULL
;
626 sonode_fini(struct sonode
*so
)
630 ASSERT(so
->so_count
== 0);
632 if (so
->so_rcv_timer_tid
) {
633 ASSERT(MUTEX_NOT_HELD(&so
->so_lock
));
634 (void) untimeout(so
->so_rcv_timer_tid
);
635 so
->so_rcv_timer_tid
= 0;
638 if (so
->so_poll_list
.ph_list
!= NULL
) {
639 pollwakeup(&so
->so_poll_list
, POLLERR
);
640 pollhead_clean(&so
->so_poll_list
);
643 if (so
->so_direct
!= NULL
)
649 if (so
->so_peercred
!= NULL
) {
650 crfree(so
->so_peercred
);
651 so
->so_peercred
= NULL
;
653 /* Detach and destroy filters */
654 if (so
->so_filter_top
!= NULL
)
655 sof_sonode_cleanup(so
);
657 ASSERT(list_is_empty(&so
->so_acceptq_list
));
658 ASSERT(list_is_empty(&so
->so_acceptq_defer
));
659 ASSERT(!list_link_active(&so
->so_acceptq_node
));
661 ASSERT(so
->so_rcv_queued
== 0);
662 ASSERT(so
->so_rcv_q_head
== NULL
);
663 ASSERT(so
->so_rcv_q_last_head
== NULL
);
664 ASSERT(so
->so_rcv_head
== NULL
);
665 ASSERT(so
->so_rcv_last_head
== NULL
);