server: Don't set SO_RCVBUF below Windows default value on Unix socket.
[wine.git] / server / sock.c
blobac55a5448f7d862403dac7eb7f0c4434c736becc
1 /*
2 * Server-side socket management
4 * Copyright (C) 1999 Marcus Meissner, Ove Kåven
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * FIXME: we use read|write access in all cases. Shouldn't we depend that
21 * on the access of the current handle?
24 #include "config.h"
26 #include <assert.h>
27 #include <fcntl.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #ifdef HAVE_IFADDRS_H
34 # include <ifaddrs.h>
35 #endif
36 #ifdef HAVE_NET_IF_H
37 # include <net/if.h>
38 #endif
39 #ifdef HAVE_NETINET_IN_H
40 # include <netinet/in.h>
41 #endif
42 #ifdef HAVE_NETINET_TCP_H
43 # include <netinet/tcp.h>
44 #endif
45 #include <poll.h>
46 #include <sys/time.h>
47 #include <sys/types.h>
48 #include <sys/socket.h>
49 #include <sys/ioctl.h>
50 #ifdef HAVE_SYS_FILIO_H
51 # include <sys/filio.h>
52 #endif
53 #include <time.h>
54 #include <unistd.h>
55 #include <limits.h>
56 #ifdef HAVE_LINUX_FILTER_H
57 # include <linux/filter.h>
58 #endif
59 #ifdef HAVE_LINUX_RTNETLINK_H
60 # include <linux/rtnetlink.h>
61 #endif
63 #ifdef HAVE_NETIPX_IPX_H
64 # include <netipx/ipx.h>
65 # define HAS_IPX
66 #elif defined(HAVE_LINUX_IPX_H)
67 # ifdef HAVE_ASM_TYPES_H
68 # include <asm/types.h>
69 # endif
70 # ifdef HAVE_LINUX_TYPES_H
71 # include <linux/types.h>
72 # endif
73 # include <linux/ipx.h>
74 # ifdef SOL_IPX
75 # define HAS_IPX
76 # endif
77 #endif
79 #ifdef HAVE_LINUX_IRDA_H
80 # ifdef HAVE_LINUX_TYPES_H
81 # include <linux/types.h>
82 # endif
83 # include <linux/irda.h>
84 # define HAS_IRDA
85 #endif
87 #include "ntstatus.h"
88 #define WIN32_NO_STATUS
89 #include "windef.h"
90 #include "winternl.h"
91 #include "winerror.h"
92 #define USE_WS_PREFIX
93 #include "winsock2.h"
94 #include "ws2tcpip.h"
95 #include "wsipx.h"
96 #include "af_irda.h"
97 #include "wine/afd.h"
98 #include "wine/rbtree.h"
100 #include "process.h"
101 #include "file.h"
102 #include "handle.h"
103 #include "thread.h"
104 #include "request.h"
105 #include "user.h"
107 #if defined(linux) && !defined(IP_UNICAST_IF)
108 #define IP_UNICAST_IF 50
109 #endif
111 static const char magic_loopback_addr[] = {127, 12, 34, 56};
113 union win_sockaddr
115 struct WS_sockaddr addr;
116 struct WS_sockaddr_in in;
117 struct WS_sockaddr_in6 in6;
118 struct WS_sockaddr_ipx ipx;
119 SOCKADDR_IRDA irda;
122 union unix_sockaddr
124 struct sockaddr addr;
125 struct sockaddr_in in;
126 struct sockaddr_in6 in6;
127 #ifdef HAS_IPX
128 struct sockaddr_ipx ipx;
129 #endif
130 #ifdef HAS_IRDA
131 struct sockaddr_irda irda;
132 #endif
135 static struct list poll_list = LIST_INIT( poll_list );
137 struct poll_req
139 struct list entry;
140 struct async *async;
141 struct iosb *iosb;
142 struct timeout_user *timeout;
143 timeout_t orig_timeout;
144 int exclusive;
145 int pending;
146 unsigned int count;
147 struct
149 struct sock *sock;
150 int mask;
151 obj_handle_t handle;
152 int flags;
153 unsigned int status;
154 } sockets[1];
157 struct accept_req
159 struct list entry;
160 struct async *async;
161 struct iosb *iosb;
162 struct sock *sock, *acceptsock;
163 int accepted;
164 unsigned int recv_len, local_len;
167 struct connect_req
169 struct async *async;
170 struct iosb *iosb;
171 struct sock *sock;
172 unsigned int addr_len, send_len, send_cursor;
175 struct send_req
177 struct iosb *iosb;
178 struct sock *sock;
181 enum connection_state
183 SOCK_LISTENING,
184 SOCK_UNCONNECTED,
185 SOCK_CONNECTING,
186 SOCK_CONNECTED,
187 SOCK_CONNECTIONLESS,
190 struct bound_addr
192 struct rb_entry entry;
193 union unix_sockaddr addr;
194 int match_any_addr;
195 int reuse_count;
198 #define MAX_ICMP_HISTORY_LENGTH 8
200 #define MIN_RCVBUF 65536
202 struct sock
204 struct object obj; /* object header */
205 struct fd *fd; /* socket file descriptor */
206 enum connection_state state; /* connection state */
207 unsigned int mask; /* event mask */
208 /* pending AFD_POLL_* events which have not yet been reported to the application */
209 unsigned int pending_events;
210 /* AFD_POLL_* events which have already been reported and should not be
211 * selected for again until reset by a relevant call.
213 * For example, if AFD_POLL_READ is set here and not in pending_events, it
214 * has already been reported and consumed, and we should not report it
215 * again, even if POLLIN is signaled, until it is reset by e.g recv().
217 * If an event has been signaled and not consumed yet, it will be set in
218 * both pending_events and reported_events (as we should only ever report
219 * any event once until it is reset.) */
220 unsigned int reported_events;
221 unsigned short proto; /* socket protocol */
222 unsigned short type; /* socket type */
223 unsigned short family; /* socket family */
224 struct event *event; /* event object */
225 user_handle_t window; /* window to send the message to */
226 unsigned int message; /* message to send */
227 obj_handle_t wparam; /* message wparam (socket handle) */
228 int errors[AFD_POLL_BIT_COUNT]; /* event errors */
229 timeout_t connect_time;/* time the socket was connected */
230 struct sock *deferred; /* socket that waits for a deferred accept */
231 struct async_queue read_q; /* queue for asynchronous reads */
232 struct async_queue write_q; /* queue for asynchronous writes */
233 struct async_queue ifchange_q; /* queue for interface change notifications */
234 struct async_queue accept_q; /* queue for asynchronous accepts */
235 struct async_queue connect_q; /* queue for asynchronous connects */
236 struct async_queue poll_q; /* queue for asynchronous polls */
237 struct object *ifchange_obj; /* the interface change notification object */
238 struct list ifchange_entry; /* entry in ifchange notification list */
239 struct list accept_list; /* list of pending accept requests */
240 struct accept_req *accept_recv_req; /* pending accept-into request which will recv on this socket */
241 struct connect_req *connect_req; /* pending connection request */
242 struct poll_req *main_poll; /* main poll */
243 union win_sockaddr addr; /* socket name */
244 int addr_len; /* socket name length */
245 unsigned int rcvbuf; /* advisory recv buffer size */
246 unsigned int sndbuf; /* advisory send buffer size */
247 unsigned int rcvtimeo; /* receive timeout in ms */
248 unsigned int sndtimeo; /* send timeout in ms */
249 struct
251 unsigned short icmp_id;
252 unsigned short icmp_seq;
254 icmp_fixup_data[MAX_ICMP_HISTORY_LENGTH]; /* Sent ICMP packets history used to fixup reply id. */
255 struct bound_addr *bound_addr[2]; /* Links to the entries in bound addresses tree. */
256 unsigned int icmp_fixup_data_len; /* Sent ICMP packets history length. */
257 unsigned int rd_shutdown : 1; /* is the read end shut down? */
258 unsigned int wr_shutdown : 1; /* is the write end shut down? */
259 unsigned int wr_shutdown_pending : 1; /* is a write shutdown pending? */
260 unsigned int hangup : 1; /* has the read end received a hangup? */
261 unsigned int aborted : 1; /* did we get a POLLERR or irregular POLLHUP? */
262 unsigned int nonblocking : 1; /* is the socket nonblocking? */
263 unsigned int bound : 1; /* is the socket bound? */
264 unsigned int reset : 1; /* did we get a TCP reset? */
265 unsigned int reuseaddr : 1; /* winsock SO_REUSEADDR option value */
266 unsigned int exclusiveaddruse : 1; /* winsock SO_EXCLUSIVEADDRUSE option value */
269 static int is_tcp_socket( struct sock *sock )
271 return sock->type == WS_SOCK_STREAM && (sock->family == WS_AF_INET || sock->family == WS_AF_INET6);
274 static int addr_compare( const void *key, const struct wine_rb_entry *entry )
276 const struct bound_addr *bound_addr = RB_ENTRY_VALUE(entry, struct bound_addr, entry);
277 const struct bound_addr *addr = key;
279 if (addr->addr.addr.sa_family != bound_addr->addr.addr.sa_family)
280 return addr->addr.addr.sa_family < bound_addr->addr.addr.sa_family ? -1 : 1;
282 if (addr->addr.addr.sa_family == AF_INET)
284 if (addr->addr.in.sin_port != bound_addr->addr.in.sin_port)
285 return addr->addr.in.sin_port < bound_addr->addr.in.sin_port ? -1 : 1;
286 if (bound_addr->match_any_addr || addr->match_any_addr
287 || addr->addr.in.sin_addr.s_addr == bound_addr->addr.in.sin_addr.s_addr)
288 return 0;
289 return addr->addr.in.sin_addr.s_addr < bound_addr->addr.in.sin_addr.s_addr ? -1 : 1;
292 assert( addr->addr.addr.sa_family == AF_INET6 );
293 if (addr->addr.in6.sin6_port != bound_addr->addr.in6.sin6_port)
294 return addr->addr.in6.sin6_port < bound_addr->addr.in6.sin6_port ? -1 : 1;
295 if (bound_addr->match_any_addr || addr->match_any_addr) return 0;
296 return memcmp( &addr->addr.in6.sin6_addr, &bound_addr->addr.in6.sin6_addr, sizeof(addr->addr.in6.sin6_addr) );
299 static int ipv4addr_from_v6( union unix_sockaddr *v4addr, const struct sockaddr_in6 *in6, int map_unspecified )
301 v4addr->in.sin_family = AF_INET;
302 v4addr->in.sin_port = in6->sin6_port;
304 if (map_unspecified && IN6_IS_ADDR_UNSPECIFIED(&in6->sin6_addr))
306 v4addr->in.sin_addr.s_addr = htonl( INADDR_ANY );
307 return 1;
309 if (IN6_IS_ADDR_V4COMPAT(&in6->sin6_addr) || IN6_IS_ADDR_V4MAPPED(&in6->sin6_addr))
311 memcpy( &v4addr->in.sin_addr.s_addr, &in6->sin6_addr.s6_addr[12], sizeof(v4addr->in.sin_addr.s_addr) );
312 return 1;
314 return 0;
317 static struct rb_tree bound_addresses_tree = { addr_compare };
319 static int should_track_conflicts_for_addr( struct sock *sock, const union unix_sockaddr *addr )
321 if (!is_tcp_socket( sock )) return 0;
323 if (sock->family == WS_AF_INET && addr->addr.sa_family == AF_INET && addr->in.sin_port)
324 return 1;
325 else if (sock->family == WS_AF_INET6 && addr->addr.sa_family == AF_INET6 && addr->in6.sin6_port)
326 return 1;
328 return 0;
331 static int is_any_addr( const union unix_sockaddr *addr )
333 if (addr->addr.sa_family == AF_INET && addr->in.sin_addr.s_addr == htonl( INADDR_ANY ))
334 return 1;
335 if (addr->addr.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr->in6.sin6_addr))
336 return 1;
337 return 0;
340 static int check_addr_usage( struct sock *sock, const union unix_sockaddr *addr, int v6only )
342 struct bound_addr *bound_addr, search_addr;
343 struct rb_entry *entry;
345 if (!should_track_conflicts_for_addr( sock, addr )) return 0;
347 search_addr.addr = *addr;
348 search_addr.match_any_addr = sock->exclusiveaddruse && is_any_addr( addr );
350 if ((entry = rb_get( &bound_addresses_tree, &search_addr )))
352 bound_addr = WINE_RB_ENTRY_VALUE(entry, struct bound_addr, entry);
353 if (bound_addr->reuse_count == -1 || !sock->reuseaddr)
355 set_error( sock->reuseaddr || bound_addr->match_any_addr
356 ? STATUS_ACCESS_DENIED : STATUS_SHARING_VIOLATION );
357 return 1;
361 if (sock->family != WS_AF_INET6 || v6only) return 0;
362 if (!ipv4addr_from_v6( &search_addr.addr, &addr->in6, sock->exclusiveaddruse )) return 0;
364 search_addr.match_any_addr = sock->exclusiveaddruse && is_any_addr( &search_addr.addr );
365 if ((entry = rb_get( &bound_addresses_tree, &search_addr )))
367 bound_addr = WINE_RB_ENTRY_VALUE(entry, struct bound_addr, entry);
368 if (bound_addr->reuse_count == -1 || !sock->reuseaddr)
370 set_error( sock->reuseaddr || bound_addr->match_any_addr
371 ? STATUS_ACCESS_DENIED : STATUS_SHARING_VIOLATION );
372 return 1;
375 return 0;
378 static struct bound_addr *register_bound_address( struct sock *sock, const union unix_sockaddr *addr )
380 struct bound_addr *bound_addr, *temp;
382 if (!(bound_addr = mem_alloc( sizeof(*bound_addr) )))
383 return NULL;
385 bound_addr->addr = *addr;
386 bound_addr->match_any_addr = sock->exclusiveaddruse && is_any_addr( addr );
388 if (rb_put( &bound_addresses_tree, bound_addr, &bound_addr->entry ))
390 temp = bound_addr;
391 bound_addr = WINE_RB_ENTRY_VALUE(rb_get( &bound_addresses_tree, temp ), struct bound_addr, entry);
392 free( temp );
393 if (bound_addr->reuse_count == -1)
395 if (debug_level)
396 fprintf( stderr, "register_bound_address: address being updated is already exclusively bound\n" );
397 return NULL;
399 ++bound_addr->reuse_count;
401 else
403 bound_addr->reuse_count = sock->reuseaddr ? 1 : -1;
405 return bound_addr;
408 static void update_addr_usage( struct sock *sock, const union unix_sockaddr *addr, int v6only )
410 union unix_sockaddr v4addr;
412 assert( !sock->bound_addr[0] && !sock->bound_addr[1] );
414 if (!should_track_conflicts_for_addr( sock, addr )) return;
416 sock->bound_addr[0] = register_bound_address( sock, addr );
418 if (sock->family != WS_AF_INET6 || v6only) return;
420 if (!ipv4addr_from_v6( &v4addr, &addr->in6, sock->exclusiveaddruse )) return;
422 sock->bound_addr[1] = register_bound_address( sock, &v4addr );
425 static void sock_dump( struct object *obj, int verbose );
426 static struct fd *sock_get_fd( struct object *obj );
427 static int sock_close_handle( struct object *obj, struct process *process, obj_handle_t handle );
428 static void sock_destroy( struct object *obj );
429 static struct object *sock_get_ifchange( struct sock *sock );
430 static void sock_release_ifchange( struct sock *sock );
432 static int sock_get_poll_events( struct fd *fd );
433 static void sock_poll_event( struct fd *fd, int event );
434 static enum server_fd_type sock_get_fd_type( struct fd *fd );
435 static void sock_ioctl( struct fd *fd, ioctl_code_t code, struct async *async );
436 static void sock_cancel_async( struct fd *fd, struct async *async );
437 static void sock_reselect_async( struct fd *fd, struct async_queue *queue );
439 static int accept_into_socket( struct sock *sock, struct sock *acceptsock );
440 static struct sock *accept_socket( struct sock *sock );
441 static int sock_get_ntstatus( int err );
442 static unsigned int sock_get_error( int err );
443 static void poll_socket( struct sock *poll_sock, struct async *async, int exclusive, timeout_t timeout,
444 unsigned int count, const struct afd_poll_socket_64 *sockets );
446 static const struct object_ops sock_ops =
448 sizeof(struct sock), /* size */
449 &file_type, /* type */
450 sock_dump, /* dump */
451 add_queue, /* add_queue */
452 remove_queue, /* remove_queue */
453 default_fd_signaled, /* signaled */
454 no_satisfied, /* satisfied */
455 no_signal, /* signal */
456 sock_get_fd, /* get_fd */
457 default_map_access, /* map_access */
458 default_get_sd, /* get_sd */
459 default_set_sd, /* set_sd */
460 no_get_full_name, /* get_full_name */
461 no_lookup_name, /* lookup_name */
462 no_link_name, /* link_name */
463 NULL, /* unlink_name */
464 no_open_file, /* open_file */
465 no_kernel_obj_list, /* get_kernel_obj_list */
466 sock_close_handle, /* close_handle */
467 sock_destroy /* destroy */
470 static const struct fd_ops sock_fd_ops =
472 sock_get_poll_events, /* get_poll_events */
473 sock_poll_event, /* poll_event */
474 sock_get_fd_type, /* get_fd_type */
475 no_fd_read, /* read */
476 no_fd_write, /* write */
477 no_fd_flush, /* flush */
478 default_fd_get_file_info, /* get_file_info */
479 no_fd_get_volume_info, /* get_volume_info */
480 sock_ioctl, /* ioctl */
481 sock_cancel_async, /* cancel_async */
482 no_fd_queue_async, /* queue_async */
483 sock_reselect_async /* reselect_async */
486 static int sockaddr_from_unix( const union unix_sockaddr *uaddr, struct WS_sockaddr *wsaddr, socklen_t wsaddrlen )
488 memset( wsaddr, 0, wsaddrlen );
490 switch (uaddr->addr.sa_family)
492 case AF_INET:
494 struct WS_sockaddr_in win = {0};
496 if (wsaddrlen < sizeof(win)) return -1;
497 win.sin_family = WS_AF_INET;
498 win.sin_port = uaddr->in.sin_port;
499 memcpy( &win.sin_addr, &uaddr->in.sin_addr, sizeof(win.sin_addr) );
500 memcpy( wsaddr, &win, sizeof(win) );
501 return sizeof(win);
504 case AF_INET6:
506 struct WS_sockaddr_in6 win = {0};
508 if (wsaddrlen < sizeof(win)) return -1;
509 win.sin6_family = WS_AF_INET6;
510 win.sin6_port = uaddr->in6.sin6_port;
511 win.sin6_flowinfo = uaddr->in6.sin6_flowinfo;
512 memcpy( &win.sin6_addr, &uaddr->in6.sin6_addr, sizeof(win.sin6_addr) );
513 #ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
514 win.sin6_scope_id = uaddr->in6.sin6_scope_id;
515 #endif
516 memcpy( wsaddr, &win, sizeof(win) );
517 return sizeof(win);
520 #ifdef HAS_IPX
521 case AF_IPX:
523 struct WS_sockaddr_ipx win = {0};
525 if (wsaddrlen < sizeof(win)) return -1;
526 win.sa_family = WS_AF_IPX;
527 memcpy( win.sa_netnum, &uaddr->ipx.sipx_network, sizeof(win.sa_netnum) );
528 memcpy( win.sa_nodenum, &uaddr->ipx.sipx_node, sizeof(win.sa_nodenum) );
529 win.sa_socket = uaddr->ipx.sipx_port;
530 memcpy( wsaddr, &win, sizeof(win) );
531 return sizeof(win);
533 #endif
535 #ifdef HAS_IRDA
536 case AF_IRDA:
538 SOCKADDR_IRDA win;
540 if (wsaddrlen < sizeof(win)) return -1;
541 win.irdaAddressFamily = WS_AF_IRDA;
542 memcpy( win.irdaDeviceID, &uaddr->irda.sir_addr, sizeof(win.irdaDeviceID) );
543 if (uaddr->irda.sir_lsap_sel != LSAP_ANY)
544 snprintf( win.irdaServiceName, sizeof(win.irdaServiceName), "LSAP-SEL%u", uaddr->irda.sir_lsap_sel );
545 else
546 memcpy( win.irdaServiceName, uaddr->irda.sir_name, sizeof(win.irdaServiceName) );
547 memcpy( wsaddr, &win, sizeof(win) );
548 return sizeof(win);
550 #endif
552 case AF_UNSPEC:
553 return 0;
555 default:
556 return -1;
561 static socklen_t sockaddr_to_unix( const struct WS_sockaddr *wsaddr, int wsaddrlen, union unix_sockaddr *uaddr )
563 memset( uaddr, 0, sizeof(*uaddr) );
565 switch (wsaddr->sa_family)
567 case WS_AF_INET:
569 struct WS_sockaddr_in win = {0};
571 if (wsaddrlen < sizeof(win)) return 0;
572 memcpy( &win, wsaddr, sizeof(win) );
573 uaddr->in.sin_family = AF_INET;
574 uaddr->in.sin_port = win.sin_port;
575 memcpy( &uaddr->in.sin_addr, &win.sin_addr, sizeof(win.sin_addr) );
576 return sizeof(uaddr->in);
579 case WS_AF_INET6:
581 struct WS_sockaddr_in6 win = {0};
583 if (wsaddrlen < sizeof(win)) return 0;
584 memcpy( &win, wsaddr, sizeof(win) );
585 uaddr->in6.sin6_family = AF_INET6;
586 uaddr->in6.sin6_port = win.sin6_port;
587 uaddr->in6.sin6_flowinfo = win.sin6_flowinfo;
588 memcpy( &uaddr->in6.sin6_addr, &win.sin6_addr, sizeof(win.sin6_addr) );
589 #ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
590 uaddr->in6.sin6_scope_id = win.sin6_scope_id;
591 #endif
592 return sizeof(uaddr->in6);
595 #ifdef HAS_IPX
596 case WS_AF_IPX:
598 struct WS_sockaddr_ipx win = {0};
600 if (wsaddrlen < sizeof(win)) return 0;
601 memcpy( &win, wsaddr, sizeof(win) );
602 uaddr->ipx.sipx_family = AF_IPX;
603 memcpy( &uaddr->ipx.sipx_network, win.sa_netnum, sizeof(win.sa_netnum) );
604 memcpy( &uaddr->ipx.sipx_node, win.sa_nodenum, sizeof(win.sa_nodenum) );
605 uaddr->ipx.sipx_port = win.sa_socket;
606 return sizeof(uaddr->ipx);
608 #endif
610 #ifdef HAS_IRDA
611 case WS_AF_IRDA:
613 SOCKADDR_IRDA win = {0};
614 unsigned int lsap_sel;
616 if (wsaddrlen < sizeof(win)) return 0;
617 memcpy( &win, wsaddr, sizeof(win) );
618 uaddr->irda.sir_family = AF_IRDA;
619 if (sscanf( win.irdaServiceName, "LSAP-SEL%u", &lsap_sel ) == 1)
620 uaddr->irda.sir_lsap_sel = lsap_sel;
621 else
623 uaddr->irda.sir_lsap_sel = LSAP_ANY;
624 memcpy( uaddr->irda.sir_name, win.irdaServiceName, sizeof(win.irdaServiceName) );
626 memcpy( &uaddr->irda.sir_addr, win.irdaDeviceID, sizeof(win.irdaDeviceID) );
627 return sizeof(uaddr->irda);
629 #endif
631 case WS_AF_UNSPEC:
632 switch (wsaddrlen)
634 default: /* likely an ipv4 address */
635 case sizeof(struct WS_sockaddr_in):
636 return sizeof(uaddr->in);
638 #ifdef HAS_IPX
639 case sizeof(struct WS_sockaddr_ipx):
640 return sizeof(uaddr->ipx);
641 #endif
643 #ifdef HAS_IRDA
644 case sizeof(SOCKADDR_IRDA):
645 return sizeof(uaddr->irda);
646 #endif
648 case sizeof(struct WS_sockaddr_in6):
649 return sizeof(uaddr->in6);
652 default:
653 return 0;
657 static socklen_t get_unix_sockaddr_any( union unix_sockaddr *uaddr, int ws_family )
659 memset( uaddr, 0, sizeof(*uaddr) );
660 switch (ws_family)
662 case WS_AF_INET:
663 uaddr->in.sin_family = AF_INET;
664 return sizeof(uaddr->in);
665 case WS_AF_INET6:
666 uaddr->in6.sin6_family = AF_INET6;
667 return sizeof(uaddr->in6);
668 #ifdef HAS_IPX
669 case WS_AF_IPX:
670 uaddr->ipx.sipx_family = AF_IPX;
671 return sizeof(uaddr->ipx);
672 #endif
673 #ifdef HAS_IRDA
674 case WS_AF_IRDA:
675 uaddr->irda.sir_family = AF_IRDA;
676 return sizeof(uaddr->irda);
677 #endif
678 default:
679 return 0;
683 /* some events are generated at the same time but must be sent in a particular
684 * order (e.g. CONNECT must be sent before READ) */
685 static const enum afd_poll_bit event_bitorder[] =
687 AFD_POLL_BIT_CONNECT,
688 AFD_POLL_BIT_CONNECT_ERR,
689 AFD_POLL_BIT_ACCEPT,
690 AFD_POLL_BIT_OOB,
691 AFD_POLL_BIT_READ,
692 AFD_POLL_BIT_WRITE,
693 AFD_POLL_BIT_RESET,
694 AFD_POLL_BIT_HUP,
695 AFD_POLL_BIT_CLOSE,
698 typedef enum {
699 SOCK_SHUTDOWN_ERROR = -1,
700 SOCK_SHUTDOWN_EOF = 0,
701 SOCK_SHUTDOWN_POLLHUP = 1
702 } sock_shutdown_t;
704 static sock_shutdown_t sock_shutdown_type = SOCK_SHUTDOWN_ERROR;
706 static sock_shutdown_t sock_check_pollhup(void)
708 sock_shutdown_t ret = SOCK_SHUTDOWN_ERROR;
709 int fd[2], n;
710 struct pollfd pfd;
711 char dummy;
713 if ( socketpair( AF_UNIX, SOCK_STREAM, 0, fd ) ) return ret;
714 if ( shutdown( fd[0], 1 ) ) goto out;
716 pfd.fd = fd[1];
717 pfd.events = POLLIN;
718 pfd.revents = 0;
720 /* Solaris' poll() sometimes returns nothing if given a 0ms timeout here */
721 n = poll( &pfd, 1, 1 );
722 if ( n != 1 ) goto out; /* error or timeout */
723 if ( pfd.revents & POLLHUP )
724 ret = SOCK_SHUTDOWN_POLLHUP;
725 else if ( pfd.revents & POLLIN &&
726 read( fd[1], &dummy, 1 ) == 0 )
727 ret = SOCK_SHUTDOWN_EOF;
729 out:
730 close( fd[0] );
731 close( fd[1] );
732 return ret;
735 void sock_init(void)
737 sock_shutdown_type = sock_check_pollhup();
739 switch ( sock_shutdown_type )
741 case SOCK_SHUTDOWN_EOF:
742 if (debug_level) fprintf( stderr, "sock_init: shutdown() causes EOF\n" );
743 break;
744 case SOCK_SHUTDOWN_POLLHUP:
745 if (debug_level) fprintf( stderr, "sock_init: shutdown() causes POLLHUP\n" );
746 break;
747 default:
748 fprintf( stderr, "sock_init: ERROR in sock_check_pollhup()\n" );
749 sock_shutdown_type = SOCK_SHUTDOWN_EOF;
753 static void sock_reselect( struct sock *sock )
755 int ev = sock_get_poll_events( sock->fd );
757 if (debug_level)
758 fprintf(stderr,"sock_reselect(%p): new mask %x\n", sock, ev);
760 set_fd_events( sock->fd, ev );
763 static unsigned int afd_poll_flag_to_win32( unsigned int flags )
765 static const unsigned int map[] =
767 FD_READ, /* READ */
768 FD_OOB, /* OOB */
769 FD_WRITE, /* WRITE */
770 FD_CLOSE, /* HUP */
771 FD_CLOSE, /* RESET */
772 0, /* CLOSE */
773 FD_CONNECT, /* CONNECT */
774 FD_ACCEPT, /* ACCEPT */
775 FD_CONNECT, /* CONNECT_ERR */
778 unsigned int i, ret = 0;
780 for (i = 0; i < ARRAY_SIZE(map); ++i)
782 if (flags & (1 << i)) ret |= map[i];
785 return ret;
788 /* wake anybody waiting on the socket event or send the associated message */
789 static void sock_wake_up( struct sock *sock )
791 unsigned int events = sock->pending_events & sock->mask;
792 int i;
794 if (sock->event)
796 if (debug_level) fprintf(stderr, "signalling events %x ptr %p\n", events, sock->event );
797 if (events)
798 set_event( sock->event );
800 if (sock->window)
802 if (debug_level) fprintf(stderr, "signalling events %x win %08x\n", events, sock->window );
803 for (i = 0; i < ARRAY_SIZE(event_bitorder); i++)
805 enum afd_poll_bit event = event_bitorder[i];
806 if (events & (1 << event))
808 lparam_t lparam = afd_poll_flag_to_win32(1 << event) | (sock_get_error( sock->errors[event] ) << 16);
809 post_message( sock->window, sock->message, sock->wparam, lparam );
812 sock->pending_events = 0;
813 sock_reselect( sock );
817 static inline int sock_error( struct sock *sock )
819 int error = 0;
820 socklen_t len = sizeof(error);
822 getsockopt( get_unix_fd(sock->fd), SOL_SOCKET, SO_ERROR, (void *)&error, &len);
824 switch (sock->state)
826 case SOCK_UNCONNECTED:
827 break;
829 case SOCK_CONNECTING:
830 if (error)
831 sock->errors[AFD_POLL_BIT_CONNECT_ERR] = error;
832 else
833 error = sock->errors[AFD_POLL_BIT_CONNECT_ERR];
834 break;
836 case SOCK_LISTENING:
837 if (error)
838 sock->errors[AFD_POLL_BIT_ACCEPT] = error;
839 else
840 error = sock->errors[AFD_POLL_BIT_ACCEPT];
841 break;
843 case SOCK_CONNECTED:
844 case SOCK_CONNECTIONLESS:
845 if (error == ECONNRESET || error == EPIPE)
847 sock->reset = 1;
848 error = 0;
850 else if (error)
851 sock->errors[AFD_POLL_BIT_HUP] = error;
852 else
853 error = sock->errors[AFD_POLL_BIT_HUP];
854 break;
857 return error;
860 static void free_accept_req( void *private )
862 struct accept_req *req = private;
863 list_remove( &req->entry );
864 if (req->acceptsock)
866 req->acceptsock->accept_recv_req = NULL;
867 release_object( req->acceptsock );
869 release_object( req->async );
870 release_object( req->iosb );
871 release_object( req->sock );
872 free( req );
875 static void fill_accept_output( struct accept_req *req )
877 const data_size_t out_size = req->iosb->out_size;
878 struct async *async = req->async;
879 union unix_sockaddr unix_addr;
880 struct WS_sockaddr *win_addr;
881 unsigned int remote_len;
882 socklen_t unix_len;
883 int fd, size = 0;
884 char *out_data;
885 int win_len;
887 if (!(out_data = mem_alloc( out_size )))
889 async_terminate( async, get_error() );
890 return;
893 fd = get_unix_fd( req->acceptsock->fd );
895 if (req->recv_len && (size = recv( fd, out_data, req->recv_len, 0 )) < 0)
897 if (!req->accepted && errno == EWOULDBLOCK)
899 req->accepted = 1;
900 sock_reselect( req->acceptsock );
901 return;
904 async_terminate( async, sock_get_ntstatus( errno ) );
905 free( out_data );
906 return;
909 if (req->local_len)
911 if (req->local_len < sizeof(int))
913 async_terminate( async, STATUS_BUFFER_TOO_SMALL );
914 free( out_data );
915 return;
918 unix_len = sizeof(unix_addr);
919 win_addr = (struct WS_sockaddr *)(out_data + req->recv_len + sizeof(int));
920 if (getsockname( fd, &unix_addr.addr, &unix_len ) < 0 ||
921 (win_len = sockaddr_from_unix( &unix_addr, win_addr, req->local_len - sizeof(int) )) < 0)
923 async_terminate( async, sock_get_ntstatus( errno ) );
924 free( out_data );
925 return;
927 memcpy( out_data + req->recv_len, &win_len, sizeof(int) );
930 unix_len = sizeof(unix_addr);
931 win_addr = (struct WS_sockaddr *)(out_data + req->recv_len + req->local_len + sizeof(int));
932 remote_len = out_size - req->recv_len - req->local_len;
933 if (getpeername( fd, &unix_addr.addr, &unix_len ) < 0 ||
934 (win_len = sockaddr_from_unix( &unix_addr, win_addr, remote_len - sizeof(int) )) < 0)
936 async_terminate( async, sock_get_ntstatus( errno ) );
937 free( out_data );
938 return;
940 memcpy( out_data + req->recv_len + req->local_len, &win_len, sizeof(int) );
942 async_request_complete( req->async, STATUS_SUCCESS, size, out_size, out_data );
945 static void complete_async_accept( struct sock *sock, struct accept_req *req )
947 struct sock *acceptsock = req->acceptsock;
948 struct async *async = req->async;
950 if (debug_level) fprintf( stderr, "completing accept request for socket %p\n", sock );
952 if (acceptsock)
954 if (!accept_into_socket( sock, acceptsock ))
956 async_terminate( async, get_error() );
957 return;
959 fill_accept_output( req );
961 else
963 obj_handle_t handle;
965 if (!(acceptsock = accept_socket( sock )))
967 async_terminate( async, get_error() );
968 return;
970 handle = alloc_handle_no_access_check( async_get_thread( async )->process, &acceptsock->obj,
971 GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, OBJ_INHERIT );
972 acceptsock->wparam = handle;
973 sock_reselect( acceptsock );
974 release_object( acceptsock );
975 if (!handle)
977 async_terminate( async, get_error() );
978 return;
981 async_request_complete_alloc( req->async, STATUS_SUCCESS, 0, sizeof(handle), &handle );
985 static void complete_async_accept_recv( struct accept_req *req )
987 if (debug_level) fprintf( stderr, "completing accept recv request for socket %p\n", req->acceptsock );
989 assert( req->recv_len );
991 fill_accept_output( req );
994 static void free_connect_req( void *private )
996 struct connect_req *req = private;
998 req->sock->connect_req = NULL;
999 release_object( req->async );
1000 release_object( req->iosb );
1001 release_object( req->sock );
1002 free( req );
1005 static void complete_async_connect( struct sock *sock )
1007 struct connect_req *req = sock->connect_req;
1008 const char *in_buffer;
1009 size_t len;
1010 int ret;
1012 if (debug_level) fprintf( stderr, "completing connect request for socket %p\n", sock );
1014 if (!req->send_len)
1016 async_terminate( req->async, STATUS_SUCCESS );
1017 return;
1020 in_buffer = (const char *)req->iosb->in_data + sizeof(struct afd_connect_params) + req->addr_len;
1021 len = req->send_len - req->send_cursor;
1023 ret = send( get_unix_fd( sock->fd ), in_buffer + req->send_cursor, len, 0 );
1024 if (ret < 0 && errno != EWOULDBLOCK)
1025 async_terminate( req->async, sock_get_ntstatus( errno ) );
1026 else if (ret == len)
1027 async_request_complete( req->async, STATUS_SUCCESS, req->send_len, 0, NULL );
1028 else
1029 req->send_cursor += ret;
1032 static void free_poll_req( void *private )
1034 struct poll_req *req = private;
1035 unsigned int i;
1037 if (req->timeout) remove_timeout_user( req->timeout );
1039 for (i = 0; i < req->count; ++i)
1040 release_object( req->sockets[i].sock );
1041 release_object( req->async );
1042 release_object( req->iosb );
1043 list_remove( &req->entry );
1044 free( req );
1047 static int is_oobinline( struct sock *sock )
1049 int oobinline;
1050 socklen_t len = sizeof(oobinline);
1051 return !getsockopt( get_unix_fd( sock->fd ), SOL_SOCKET, SO_OOBINLINE, (char *)&oobinline, &len ) && oobinline;
1054 static int get_poll_flags( struct sock *sock, int event )
1056 int flags = 0;
1058 /* A connection-mode socket which has never been connected does not return
1059 * write or hangup events, but Linux reports POLLOUT | POLLHUP. */
1060 if (sock->state == SOCK_UNCONNECTED)
1061 event &= ~(POLLOUT | POLLHUP);
1063 if (event & POLLIN)
1065 if (sock->state == SOCK_LISTENING)
1066 flags |= AFD_POLL_ACCEPT;
1067 else
1068 flags |= AFD_POLL_READ;
1070 if (event & POLLPRI)
1071 flags |= is_oobinline( sock ) ? AFD_POLL_READ : AFD_POLL_OOB;
1072 if (event & POLLOUT)
1073 flags |= AFD_POLL_WRITE;
1074 if (sock->state == SOCK_CONNECTED)
1075 flags |= AFD_POLL_CONNECT;
1076 if (event & POLLHUP)
1077 flags |= AFD_POLL_HUP;
1078 if (event & POLLERR)
1079 flags |= AFD_POLL_CONNECT_ERR;
1080 if (sock->reset)
1081 flags |= AFD_POLL_RESET;
1083 return flags;
1086 static void complete_async_poll( struct poll_req *req, unsigned int status )
1088 unsigned int i, signaled_count = 0;
1090 for (i = 0; i < req->count; ++i)
1092 struct sock *sock = req->sockets[i].sock;
1094 if (sock->main_poll == req)
1095 sock->main_poll = NULL;
1098 if (!status)
1100 for (i = 0; i < req->count; ++i)
1102 if (req->sockets[i].flags)
1103 ++signaled_count;
1107 if (is_machine_64bit( async_get_thread( req->async )->process->machine ))
1109 size_t output_size = offsetof( struct afd_poll_params_64, sockets[signaled_count] );
1110 struct afd_poll_params_64 *output;
1112 if (!(output = mem_alloc( output_size )))
1114 async_terminate( req->async, get_error() );
1115 return;
1117 memset( output, 0, output_size );
1118 output->timeout = req->orig_timeout;
1119 output->exclusive = req->exclusive;
1120 for (i = 0; i < req->count; ++i)
1122 if (!req->sockets[i].flags) continue;
1123 output->sockets[output->count].socket = req->sockets[i].handle;
1124 output->sockets[output->count].flags = req->sockets[i].flags;
1125 output->sockets[output->count].status = req->sockets[i].status;
1126 ++output->count;
1128 assert( output->count == signaled_count );
1130 async_request_complete( req->async, status, output_size, output_size, output );
1132 else
1134 size_t output_size = offsetof( struct afd_poll_params_32, sockets[signaled_count] );
1135 struct afd_poll_params_32 *output;
1137 if (!(output = mem_alloc( output_size )))
1139 async_terminate( req->async, get_error() );
1140 return;
1142 memset( output, 0, output_size );
1143 output->timeout = req->orig_timeout;
1144 output->exclusive = req->exclusive;
1145 for (i = 0; i < req->count; ++i)
1147 if (!req->sockets[i].flags) continue;
1148 output->sockets[output->count].socket = req->sockets[i].handle;
1149 output->sockets[output->count].flags = req->sockets[i].flags;
1150 output->sockets[output->count].status = req->sockets[i].status;
1151 ++output->count;
1153 assert( output->count == signaled_count );
1155 async_request_complete( req->async, status, output_size, output_size, output );
1159 static void complete_async_polls( struct sock *sock, int event, int error )
1161 int flags = get_poll_flags( sock, event );
1162 struct poll_req *req, *next;
1164 LIST_FOR_EACH_ENTRY_SAFE( req, next, &poll_list, struct poll_req, entry )
1166 unsigned int i;
1168 if (req->iosb->status != STATUS_PENDING) continue;
1170 for (i = 0; i < req->count; ++i)
1172 if (req->sockets[i].sock != sock) continue;
1173 if (!(req->sockets[i].mask & flags)) continue;
1175 if (debug_level)
1176 fprintf( stderr, "completing poll for socket %p, wanted %#x got %#x\n",
1177 sock, req->sockets[i].mask, flags );
1179 req->sockets[i].flags = req->sockets[i].mask & flags;
1180 req->sockets[i].status = sock_get_ntstatus( error );
1182 if (req->pending)
1184 complete_async_poll( req, STATUS_SUCCESS );
1185 break;
1191 static void async_poll_timeout( void *private )
1193 struct poll_req *req = private;
1195 req->timeout = NULL;
1197 if (req->iosb->status != STATUS_PENDING) return;
1199 complete_async_poll( req, STATUS_TIMEOUT );
1202 static int sock_dispatch_asyncs( struct sock *sock, int event, int error )
1204 if (event & (POLLIN | POLLPRI))
1206 struct accept_req *req;
1208 LIST_FOR_EACH_ENTRY( req, &sock->accept_list, struct accept_req, entry )
1210 if (req->iosb->status == STATUS_PENDING && !req->accepted)
1212 complete_async_accept( sock, req );
1213 event &= ~POLLIN;
1214 break;
1218 if (sock->accept_recv_req && sock->accept_recv_req->iosb->status == STATUS_PENDING)
1219 complete_async_accept_recv( sock->accept_recv_req );
1222 if ((event & POLLOUT) && sock->connect_req && sock->connect_req->iosb->status == STATUS_PENDING)
1223 complete_async_connect( sock );
1225 if ((event & (POLLIN | POLLPRI)) && async_queued( &sock->read_q ))
1227 if (async_waiting( &sock->read_q ))
1229 if (debug_level) fprintf( stderr, "activating read queue for socket %p\n", sock );
1230 async_wake_up( &sock->read_q, STATUS_ALERTED );
1232 event &= ~(POLLIN | POLLPRI);
1235 if ((event & POLLOUT) && async_queued( &sock->write_q ))
1237 if (async_waiting( &sock->write_q ))
1239 if (debug_level) fprintf( stderr, "activating write queue for socket %p\n", sock );
1240 async_wake_up( &sock->write_q, STATUS_ALERTED );
1242 event &= ~POLLOUT;
1245 if (event & (POLLERR | POLLHUP))
1247 int status = sock_get_ntstatus( error );
1248 struct accept_req *req, *next;
1250 async_wake_up( &sock->read_q, status );
1251 async_wake_up( &sock->write_q, status );
1253 LIST_FOR_EACH_ENTRY_SAFE( req, next, &sock->accept_list, struct accept_req, entry )
1255 if (req->iosb->status == STATUS_PENDING)
1256 async_terminate( req->async, status );
1259 if (sock->accept_recv_req && sock->accept_recv_req->iosb->status == STATUS_PENDING)
1260 async_terminate( sock->accept_recv_req->async, status );
1262 if (sock->connect_req)
1263 async_terminate( sock->connect_req->async, status );
1266 if (sock->reset)
1268 async_wake_up( &sock->read_q, STATUS_CONNECTION_RESET );
1269 async_wake_up( &sock->write_q, STATUS_CONNECTION_RESET );
1271 if (sock->accept_recv_req && sock->accept_recv_req->iosb->status == STATUS_PENDING)
1272 async_terminate( sock->accept_recv_req->async, STATUS_CONNECTION_RESET );
1275 return event;
1278 static void post_socket_event( struct sock *sock, enum afd_poll_bit event_bit )
1280 unsigned int event = (1 << event_bit);
1282 if (!(sock->reported_events & event))
1284 sock->pending_events |= event;
1285 sock->reported_events |= event;
1289 static void sock_dispatch_events( struct sock *sock, enum connection_state prevstate, int event )
1291 switch (prevstate)
1293 case SOCK_UNCONNECTED:
1294 break;
1296 case SOCK_CONNECTING:
1297 if (event & POLLOUT)
1299 post_socket_event( sock, AFD_POLL_BIT_CONNECT );
1300 post_socket_event( sock, AFD_POLL_BIT_WRITE );
1302 if (event & (POLLERR | POLLHUP))
1303 post_socket_event( sock, AFD_POLL_BIT_CONNECT_ERR );
1304 break;
1306 case SOCK_LISTENING:
1307 if (event & (POLLIN | POLLERR | POLLHUP))
1308 post_socket_event( sock, AFD_POLL_BIT_ACCEPT );
1309 break;
1311 case SOCK_CONNECTED:
1312 case SOCK_CONNECTIONLESS:
1313 if (sock->reset)
1314 post_socket_event( sock, AFD_POLL_BIT_RESET );
1316 if (event & POLLIN)
1317 post_socket_event( sock, AFD_POLL_BIT_READ );
1319 if (event & POLLOUT)
1320 post_socket_event( sock, AFD_POLL_BIT_WRITE );
1322 if (event & POLLPRI)
1323 post_socket_event( sock, AFD_POLL_BIT_OOB );
1325 if (event & (POLLERR | POLLHUP))
1326 post_socket_event( sock, AFD_POLL_BIT_HUP );
1327 break;
1330 sock_wake_up( sock );
1333 static void sock_poll_event( struct fd *fd, int event )
1335 struct sock *sock = get_fd_user( fd );
1336 int hangup_seen = 0;
1337 enum connection_state prevstate = sock->state;
1338 int error = 0;
1340 assert( sock->obj.ops == &sock_ops );
1341 if (debug_level)
1342 fprintf(stderr, "socket %p select event: %x\n", sock, event);
1344 if (event & (POLLERR | POLLHUP))
1345 error = sock_error( sock );
1347 switch (sock->state)
1349 case SOCK_UNCONNECTED:
1350 break;
1352 case SOCK_CONNECTING:
1353 if (event & (POLLERR|POLLHUP))
1355 sock->state = SOCK_UNCONNECTED;
1356 event &= ~POLLOUT;
1358 else if (event & POLLOUT)
1360 sock->state = SOCK_CONNECTED;
1361 sock->connect_time = current_time;
1362 sock->errors[AFD_POLL_BIT_CONNECT_ERR] = 0;
1364 break;
1366 case SOCK_LISTENING:
1367 break;
1369 case SOCK_CONNECTED:
1370 case SOCK_CONNECTIONLESS:
1371 if (sock->reset)
1372 event &= ~(POLLIN | POLLERR | POLLHUP);
1374 if (sock->type == WS_SOCK_STREAM && (event & POLLIN))
1376 char dummy;
1377 int nr;
1379 /* Linux 2.4 doesn't report POLLHUP if only one side of the socket
1380 * has been closed, so we need to check for it explicitly here */
1381 nr = recv( get_unix_fd( fd ), &dummy, 1, MSG_PEEK );
1382 if ( nr == 0 )
1384 hangup_seen = 1;
1385 event &= ~POLLIN;
1387 else if ( nr < 0 )
1389 event &= ~POLLIN;
1390 /* EAGAIN can happen if an async recv() falls between the server's poll()
1391 call and the invocation of this routine */
1392 if (errno == ECONNRESET || errno == EPIPE)
1394 sock->reset = 1;
1396 else if (errno != EAGAIN)
1398 error = errno;
1399 event |= POLLERR;
1400 sock->errors[AFD_POLL_BIT_HUP] = error;
1401 if ( debug_level )
1402 fprintf( stderr, "recv error on socket %p: %d\n", sock, errno );
1407 if (hangup_seen || (sock_shutdown_type == SOCK_SHUTDOWN_POLLHUP && (event & POLLHUP)))
1409 sock->hangup = 1;
1411 else if (event & (POLLHUP | POLLERR))
1413 sock->aborted = 1;
1415 if (debug_level)
1416 fprintf( stderr, "socket %p aborted by error %d, event %#x\n", sock, error, event );
1419 if (hangup_seen)
1420 event |= POLLHUP;
1421 break;
1424 event = sock_dispatch_asyncs( sock, event, error );
1425 sock_dispatch_events( sock, prevstate, event );
1426 complete_async_polls( sock, event, error );
1428 sock_reselect( sock );
1431 static void sock_dump( struct object *obj, int verbose )
1433 struct sock *sock = (struct sock *)obj;
1434 assert( obj->ops == &sock_ops );
1435 fprintf( stderr, "Socket fd=%p, state=%x, mask=%x, pending=%x, reported=%x\n",
1436 sock->fd, sock->state,
1437 sock->mask, sock->pending_events, sock->reported_events );
1440 static int poll_flags_from_afd( struct sock *sock, int flags )
1442 int ev = 0;
1444 /* A connection-mode socket which has never been connected does
1445 * not return write or hangup events, but Linux returns
1446 * POLLOUT | POLLHUP. */
1447 if (sock->state == SOCK_UNCONNECTED)
1448 return -1;
1450 if (flags & (AFD_POLL_READ | AFD_POLL_ACCEPT))
1451 ev |= POLLIN;
1452 if ((flags & AFD_POLL_HUP) && sock->type == WS_SOCK_STREAM)
1453 ev |= POLLIN;
1454 if (flags & AFD_POLL_OOB)
1455 ev |= is_oobinline( sock ) ? POLLIN : POLLPRI;
1456 if (flags & AFD_POLL_WRITE)
1457 ev |= POLLOUT;
1459 return ev;
1462 static int sock_get_poll_events( struct fd *fd )
1464 struct sock *sock = get_fd_user( fd );
1465 unsigned int mask = sock->mask & ~sock->reported_events;
1466 struct poll_req *req;
1467 int ev = 0;
1469 assert( sock->obj.ops == &sock_ops );
1471 if (!sock->type) /* not initialized yet */
1472 return -1;
1474 LIST_FOR_EACH_ENTRY( req, &poll_list, struct poll_req, entry )
1476 unsigned int i;
1478 if (req->iosb->status != STATUS_PENDING) continue;
1480 for (i = 0; i < req->count; ++i)
1482 if (req->sockets[i].sock != sock) continue;
1484 ev |= poll_flags_from_afd( sock, req->sockets[i].mask );
1488 switch (sock->state)
1490 case SOCK_UNCONNECTED:
1491 /* A connection-mode Windows socket which has never been connected does
1492 * not return any events, but Linux returns POLLOUT | POLLHUP. Hence we
1493 * need to return -1 here, to prevent the socket from being polled on at
1494 * all. */
1495 return -1;
1497 case SOCK_CONNECTING:
1498 return POLLOUT;
1500 case SOCK_LISTENING:
1501 if (!list_empty( &sock->accept_list ) || (mask & AFD_POLL_ACCEPT))
1502 ev |= POLLIN;
1503 break;
1505 case SOCK_CONNECTED:
1506 case SOCK_CONNECTIONLESS:
1507 if (sock->hangup && sock->wr_shutdown && !sock->wr_shutdown_pending)
1509 /* Linux returns POLLHUP if a socket is both SHUT_RD and SHUT_WR, or
1510 * if both the socket and its peer are SHUT_WR.
1512 * We don't use SHUT_RD, so we can only encounter this in the latter
1513 * case. In that case there can't be any pending read requests (they
1514 * would have already been completed with a length of zero), the
1515 * above condition ensures that we don't have any pending write
1516 * requests, and nothing that can change about the socket state that
1517 * would complete a pending poll request. */
1518 return -1;
1521 if (sock->aborted || sock->reset)
1522 return -1;
1524 if (sock->accept_recv_req)
1526 ev |= POLLIN;
1528 else if (async_queued( &sock->read_q ))
1530 /* Clear POLLIN and POLLPRI if we have an alerted async, even if
1531 * we're polling this socket for READ or OOB. We can't signal the
1532 * poll if the pending async will read all of the data [cf. the
1533 * matching logic in sock_dispatch_asyncs()], but we also don't
1534 * want to spin polling for POLLIN if we're not going to use it. */
1535 if (async_waiting( &sock->read_q ))
1536 ev |= POLLIN | POLLPRI;
1537 else
1538 ev &= ~(POLLIN | POLLPRI);
1540 else
1542 /* Don't ask for POLLIN if we got a hangup. We won't receive more
1543 * data anyway, but we will get POLLIN if SOCK_SHUTDOWN_EOF. */
1544 if (!sock->hangup)
1546 if (mask & AFD_POLL_READ)
1547 ev |= POLLIN;
1548 if (mask & AFD_POLL_OOB)
1549 ev |= POLLPRI;
1552 /* We use POLLIN with 0 bytes recv() as hangup indication for stream sockets. */
1553 if (sock->state == SOCK_CONNECTED && (mask & AFD_POLL_HUP) && !(sock->reported_events & AFD_POLL_READ))
1554 ev |= POLLIN;
1557 if (async_queued( &sock->write_q ))
1559 /* As with read asyncs above, clear POLLOUT if we have an alerted
1560 * async. */
1561 if (async_waiting( &sock->write_q ))
1562 ev |= POLLOUT;
1563 else
1564 ev &= ~POLLOUT;
1566 else if (!sock->wr_shutdown && (mask & AFD_POLL_WRITE))
1568 ev |= POLLOUT;
1571 break;
1574 return ev;
1577 static enum server_fd_type sock_get_fd_type( struct fd *fd )
1579 return FD_TYPE_SOCKET;
1582 static void sock_cancel_async( struct fd *fd, struct async *async )
1584 struct poll_req *req;
1586 LIST_FOR_EACH_ENTRY( req, &poll_list, struct poll_req, entry )
1588 unsigned int i;
1590 if (req->async != async)
1591 continue;
1593 for (i = 0; i < req->count; i++)
1595 struct sock *sock = req->sockets[i].sock;
1597 if (sock->main_poll == req)
1598 sock->main_poll = NULL;
1602 async_terminate( async, STATUS_CANCELLED );
1605 static void sock_reselect_async( struct fd *fd, struct async_queue *queue )
1607 struct sock *sock = get_fd_user( fd );
1609 if (sock->wr_shutdown_pending && list_empty( &sock->write_q.queue ))
1611 shutdown( get_unix_fd( sock->fd ), SHUT_WR );
1612 sock->wr_shutdown_pending = 0;
1615 /* Don't reselect the ifchange queue; we always ask for POLLIN.
1616 * Don't reselect an uninitialized socket; we can't call set_fd_events() on
1617 * a pseudo-fd. */
1618 if (queue != &sock->ifchange_q && sock->type)
1619 sock_reselect( sock );
1622 static struct fd *sock_get_fd( struct object *obj )
1624 struct sock *sock = (struct sock *)obj;
1625 return (struct fd *)grab_object( sock->fd );
1628 static int sock_close_handle( struct object *obj, struct process *process, obj_handle_t handle )
1630 struct sock *sock = (struct sock *)obj;
1632 if (sock->obj.handle_count == 1) /* last handle */
1634 struct accept_req *accept_req, *accept_next;
1635 struct poll_req *poll_req, *poll_next;
1637 if (sock->accept_recv_req)
1638 async_terminate( sock->accept_recv_req->async, STATUS_CANCELLED );
1640 LIST_FOR_EACH_ENTRY_SAFE( accept_req, accept_next, &sock->accept_list, struct accept_req, entry )
1641 async_terminate( accept_req->async, STATUS_CANCELLED );
1643 if (sock->connect_req)
1644 async_terminate( sock->connect_req->async, STATUS_CANCELLED );
1646 LIST_FOR_EACH_ENTRY_SAFE( poll_req, poll_next, &poll_list, struct poll_req, entry )
1648 struct iosb *iosb = poll_req->iosb;
1649 BOOL signaled = FALSE;
1650 unsigned int i;
1652 if (iosb->status != STATUS_PENDING) continue;
1654 for (i = 0; i < poll_req->count; ++i)
1656 if (poll_req->sockets[i].sock == sock)
1658 signaled = TRUE;
1659 poll_req->sockets[i].flags = AFD_POLL_CLOSE;
1660 poll_req->sockets[i].status = 0;
1664 if (signaled) complete_async_poll( poll_req, STATUS_SUCCESS );
1667 return async_close_obj_handle( obj, process, handle );
1670 static void sock_destroy( struct object *obj )
1672 struct sock *sock = (struct sock *)obj;
1673 unsigned int i;
1675 assert( obj->ops == &sock_ops );
1677 /* FIXME: special socket shutdown stuff? */
1679 for (i = 0; i < 2; ++i)
1681 if (sock->bound_addr[i] && --sock->bound_addr[i]->reuse_count <= 0)
1683 rb_remove( &bound_addresses_tree, &sock->bound_addr[i]->entry );
1684 free( sock->bound_addr[i] );
1688 if ( sock->deferred )
1689 release_object( sock->deferred );
1691 async_wake_up( &sock->ifchange_q, STATUS_CANCELLED );
1692 sock_release_ifchange( sock );
1693 free_async_queue( &sock->read_q );
1694 free_async_queue( &sock->write_q );
1695 free_async_queue( &sock->ifchange_q );
1696 free_async_queue( &sock->accept_q );
1697 free_async_queue( &sock->connect_q );
1698 free_async_queue( &sock->poll_q );
1699 if (sock->event) release_object( sock->event );
1700 if (sock->fd) release_object( sock->fd );
1703 static struct sock *create_socket(void)
1705 struct sock *sock;
1707 if (!(sock = alloc_object( &sock_ops ))) return NULL;
1708 sock->fd = NULL;
1709 sock->state = SOCK_UNCONNECTED;
1710 sock->mask = 0;
1711 sock->pending_events = 0;
1712 sock->reported_events = 0;
1713 sock->proto = 0;
1714 sock->type = 0;
1715 sock->family = 0;
1716 sock->event = NULL;
1717 sock->window = 0;
1718 sock->message = 0;
1719 sock->wparam = 0;
1720 sock->connect_time = 0;
1721 sock->deferred = NULL;
1722 sock->ifchange_obj = NULL;
1723 sock->accept_recv_req = NULL;
1724 sock->connect_req = NULL;
1725 sock->main_poll = NULL;
1726 memset( &sock->addr, 0, sizeof(sock->addr) );
1727 sock->addr_len = 0;
1728 sock->rd_shutdown = 0;
1729 sock->wr_shutdown = 0;
1730 sock->wr_shutdown_pending = 0;
1731 sock->hangup = 0;
1732 sock->aborted = 0;
1733 sock->nonblocking = 0;
1734 sock->bound = 0;
1735 sock->reset = 0;
1736 sock->reuseaddr = 0;
1737 sock->exclusiveaddruse = 0;
1738 sock->rcvbuf = 0;
1739 sock->sndbuf = 0;
1740 sock->rcvtimeo = 0;
1741 sock->sndtimeo = 0;
1742 sock->icmp_fixup_data_len = 0;
1743 sock->bound_addr[0] = sock->bound_addr[1] = NULL;
1744 init_async_queue( &sock->read_q );
1745 init_async_queue( &sock->write_q );
1746 init_async_queue( &sock->ifchange_q );
1747 init_async_queue( &sock->accept_q );
1748 init_async_queue( &sock->connect_q );
1749 init_async_queue( &sock->poll_q );
1750 memset( sock->errors, 0, sizeof(sock->errors) );
1751 list_init( &sock->accept_list );
1752 return sock;
1755 static int get_unix_family( int family )
1757 switch (family)
1759 case WS_AF_INET: return AF_INET;
1760 case WS_AF_INET6: return AF_INET6;
1761 #ifdef HAS_IPX
1762 case WS_AF_IPX: return AF_IPX;
1763 #endif
1764 #ifdef AF_IRDA
1765 case WS_AF_IRDA: return AF_IRDA;
1766 #endif
1767 case WS_AF_UNSPEC: return AF_UNSPEC;
1768 default: return -1;
1772 static int get_unix_type( int type )
1774 switch (type)
1776 case WS_SOCK_DGRAM: return SOCK_DGRAM;
1777 case WS_SOCK_RAW: return SOCK_RAW;
1778 case WS_SOCK_STREAM: return SOCK_STREAM;
1779 default: return -1;
1783 static int get_unix_protocol( int protocol )
1785 if (protocol >= WS_NSPROTO_IPX && protocol <= WS_NSPROTO_IPX + 255)
1786 return protocol;
1788 switch (protocol)
1790 case WS_IPPROTO_ICMP: return IPPROTO_ICMP;
1791 case WS_IPPROTO_IGMP: return IPPROTO_IGMP;
1792 case WS_IPPROTO_IP: return IPPROTO_IP;
1793 case WS_IPPROTO_IPV4: return IPPROTO_IPIP;
1794 case WS_IPPROTO_IPV6: return IPPROTO_IPV6;
1795 case WS_IPPROTO_RAW: return IPPROTO_RAW;
1796 case WS_IPPROTO_TCP: return IPPROTO_TCP;
1797 case WS_IPPROTO_UDP: return IPPROTO_UDP;
1798 default: return -1;
1802 static void set_dont_fragment( int fd, int level, int value )
1804 int optname;
1806 if (level == IPPROTO_IP)
1808 #ifdef IP_DONTFRAG
1809 optname = IP_DONTFRAG;
1810 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO) && defined(IP_PMTUDISC_DONT)
1811 optname = IP_MTU_DISCOVER;
1812 value = value ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
1813 #else
1814 return;
1815 #endif
1817 else
1819 #ifdef IPV6_DONTFRAG
1820 optname = IPV6_DONTFRAG;
1821 #elif defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO) && defined(IPV6_PMTUDISC_DONT)
1822 optname = IPV6_MTU_DISCOVER;
1823 value = value ? IPV6_PMTUDISC_DO : IPV6_PMTUDISC_DONT;
1824 #else
1825 return;
1826 #endif
1829 setsockopt( fd, level, optname, &value, sizeof(value) );
1832 static int init_socket( struct sock *sock, int family, int type, int protocol )
1834 unsigned int options = 0;
1835 int sockfd, unix_type, unix_family, unix_protocol, value;
1836 socklen_t len;
1838 unix_family = get_unix_family( family );
1839 unix_type = get_unix_type( type );
1840 unix_protocol = get_unix_protocol( protocol );
1842 if (unix_protocol < 0)
1844 if (type && unix_type < 0)
1845 set_win32_error( WSAESOCKTNOSUPPORT );
1846 else
1847 set_win32_error( WSAEPROTONOSUPPORT );
1848 return -1;
1850 if (unix_family < 0)
1852 if (family >= 0 && unix_type < 0)
1853 set_win32_error( WSAESOCKTNOSUPPORT );
1854 else
1855 set_win32_error( WSAEAFNOSUPPORT );
1856 return -1;
1859 sockfd = socket( unix_family, unix_type, unix_protocol );
1861 #ifdef linux
1862 if (sockfd == -1 && errno == EPERM && unix_family == AF_INET
1863 && unix_type == SOCK_RAW && unix_protocol == IPPROTO_ICMP)
1865 sockfd = socket( unix_family, SOCK_DGRAM, unix_protocol );
1866 if (sockfd != -1)
1868 const int val = 1;
1870 setsockopt( sockfd, IPPROTO_IP, IP_RECVTTL, (const char *)&val, sizeof(val) );
1871 setsockopt( sockfd, IPPROTO_IP, IP_RECVTOS, (const char *)&val, sizeof(val) );
1872 setsockopt( sockfd, IPPROTO_IP, IP_PKTINFO, (const char *)&val, sizeof(val) );
1875 #endif
1877 if (sockfd == -1)
1879 if (errno == EINVAL) set_win32_error( WSAESOCKTNOSUPPORT );
1880 else set_win32_error( sock_get_error( errno ));
1881 return -1;
1883 fcntl(sockfd, F_SETFL, O_NONBLOCK); /* make socket nonblocking */
1885 if (family == WS_AF_IPX && protocol >= WS_NSPROTO_IPX && protocol <= WS_NSPROTO_IPX + 255)
1887 #ifdef HAS_IPX
1888 int ipx_type = protocol - WS_NSPROTO_IPX;
1890 #ifdef SOL_IPX
1891 setsockopt( sockfd, SOL_IPX, IPX_TYPE, &ipx_type, sizeof(ipx_type) );
1892 #else
1893 struct ipx val;
1894 /* Should we retrieve val using a getsockopt call and then
1895 * set the modified one? */
1896 val.ipx_pt = ipx_type;
1897 setsockopt( sockfd, 0, SO_DEFAULT_HEADERS, &val, sizeof(val) );
1898 #endif
1899 #endif
1902 if (unix_family == AF_INET || unix_family == AF_INET6)
1904 /* ensure IP_DONTFRAGMENT is disabled for SOCK_DGRAM and SOCK_RAW, enabled for SOCK_STREAM */
1905 if (unix_type == SOCK_DGRAM || unix_type == SOCK_RAW) /* in Linux the global default can be enabled */
1906 set_dont_fragment( sockfd, unix_family == AF_INET6 ? IPPROTO_IPV6 : IPPROTO_IP, FALSE );
1907 else if (unix_type == SOCK_STREAM)
1908 set_dont_fragment( sockfd, unix_family == AF_INET6 ? IPPROTO_IPV6 : IPPROTO_IP, TRUE );
1911 #ifdef IPV6_V6ONLY
1912 if (unix_family == AF_INET6)
1914 static const int enable = 1;
1915 setsockopt( sockfd, IPPROTO_IPV6, IPV6_V6ONLY, &enable, sizeof(enable) );
1917 #endif
1919 len = sizeof(value);
1920 if (!getsockopt( sockfd, SOL_SOCKET, SO_RCVBUF, &value, &len ))
1922 if (value < MIN_RCVBUF)
1924 value = MIN_RCVBUF;
1925 setsockopt( sockfd, SOL_SOCKET, SO_RCVBUF, &value, sizeof(value) );
1927 sock->rcvbuf = value;
1930 len = sizeof(value);
1931 if (!getsockopt( sockfd, SOL_SOCKET, SO_SNDBUF, &value, &len ))
1932 sock->sndbuf = value;
1934 sock->state = (type == WS_SOCK_STREAM ? SOCK_UNCONNECTED : SOCK_CONNECTIONLESS);
1935 sock->proto = protocol;
1936 sock->type = type;
1937 sock->family = family;
1939 if (is_tcp_socket( sock ))
1941 value = 1;
1942 setsockopt( sockfd, SOL_SOCKET, SO_REUSEADDR, &value, sizeof(value) );
1943 #ifdef TCP_SYNCNT
1944 value = 4;
1945 setsockopt( sockfd, IPPROTO_TCP, TCP_SYNCNT, &value, sizeof(value) );
1946 #endif
1949 if (sock->fd)
1951 options = get_fd_options( sock->fd );
1952 release_object( sock->fd );
1955 if (!(sock->fd = create_anonymous_fd( &sock_fd_ops, sockfd, &sock->obj, options )))
1957 return -1;
1960 /* We can't immediately allow caching for a connection-mode socket, since it
1961 * might be accepted into (changing the underlying fd object.) */
1962 if (sock->type != WS_SOCK_STREAM) allow_fd_caching( sock->fd );
1964 return 0;
1967 /* accepts a socket and inits it */
1968 static int accept_new_fd( struct sock *sock )
1971 /* Try to accept(2). We can't be safe that this an already connected socket
1972 * or that accept() is allowed on it. In those cases we will get -1/errno
1973 * return.
1975 struct sockaddr saddr;
1976 socklen_t slen = sizeof(saddr);
1977 int acceptfd = accept( get_unix_fd(sock->fd), &saddr, &slen );
1978 if (acceptfd != -1)
1979 fcntl( acceptfd, F_SETFL, O_NONBLOCK );
1980 else
1981 set_error( sock_get_ntstatus( errno ));
1982 return acceptfd;
1985 /* accept a socket (creates a new fd) */
1986 static struct sock *accept_socket( struct sock *sock )
1988 struct sock *acceptsock;
1989 int acceptfd;
1991 if (get_unix_fd( sock->fd ) == -1) return NULL;
1993 if ( sock->deferred )
1995 acceptsock = sock->deferred;
1996 sock->deferred = NULL;
1998 else
2000 union unix_sockaddr unix_addr;
2001 socklen_t unix_len;
2003 if ((acceptfd = accept_new_fd( sock )) == -1) return NULL;
2004 if (!(acceptsock = create_socket()))
2006 close( acceptfd );
2007 return NULL;
2010 /* newly created socket gets the same properties of the listening socket */
2011 acceptsock->state = SOCK_CONNECTED;
2012 acceptsock->bound = 1;
2013 acceptsock->nonblocking = sock->nonblocking;
2014 acceptsock->mask = sock->mask;
2015 acceptsock->proto = sock->proto;
2016 acceptsock->type = sock->type;
2017 acceptsock->family = sock->family;
2018 acceptsock->window = sock->window;
2019 acceptsock->message = sock->message;
2020 acceptsock->reuseaddr = sock->reuseaddr;
2021 acceptsock->exclusiveaddruse = sock->exclusiveaddruse;
2022 acceptsock->sndbuf = sock->sndbuf;
2023 acceptsock->rcvbuf = sock->rcvbuf;
2024 acceptsock->sndtimeo = sock->sndtimeo;
2025 acceptsock->rcvtimeo = sock->rcvtimeo;
2026 acceptsock->connect_time = current_time;
2028 if (sock->event) acceptsock->event = (struct event *)grab_object( sock->event );
2029 if (!(acceptsock->fd = create_anonymous_fd( &sock_fd_ops, acceptfd, &acceptsock->obj,
2030 get_fd_options( sock->fd ) )))
2032 release_object( acceptsock );
2033 return NULL;
2035 unix_len = sizeof(unix_addr);
2036 if (!getsockname( acceptfd, &unix_addr.addr, &unix_len ))
2037 acceptsock->addr_len = sockaddr_from_unix( &unix_addr, &acceptsock->addr.addr, sizeof(acceptsock->addr) );
2039 clear_error();
2040 sock->pending_events &= ~AFD_POLL_ACCEPT;
2041 sock->reported_events &= ~AFD_POLL_ACCEPT;
2042 sock_reselect( sock );
2043 return acceptsock;
2046 static int accept_into_socket( struct sock *sock, struct sock *acceptsock )
2048 union unix_sockaddr unix_addr;
2049 socklen_t unix_len;
2050 int acceptfd;
2051 struct fd *newfd;
2053 if (get_unix_fd( sock->fd ) == -1) return FALSE;
2055 if ( sock->deferred )
2057 newfd = dup_fd_object( sock->deferred->fd, 0, 0,
2058 get_fd_options( acceptsock->fd ) );
2059 if ( !newfd )
2060 return FALSE;
2062 set_fd_user( newfd, &sock_fd_ops, &acceptsock->obj );
2064 release_object( sock->deferred );
2065 sock->deferred = NULL;
2067 else
2069 if ((acceptfd = accept_new_fd( sock )) == -1)
2070 return FALSE;
2072 if (!(newfd = create_anonymous_fd( &sock_fd_ops, acceptfd, &acceptsock->obj,
2073 get_fd_options( acceptsock->fd ) )))
2074 return FALSE;
2077 acceptsock->state = SOCK_CONNECTED;
2078 acceptsock->bound = 1;
2079 acceptsock->pending_events = 0;
2080 acceptsock->reported_events = 0;
2081 acceptsock->proto = sock->proto;
2082 acceptsock->type = sock->type;
2083 acceptsock->family = sock->family;
2084 acceptsock->wparam = 0;
2085 acceptsock->deferred = NULL;
2086 acceptsock->connect_time = current_time;
2087 fd_copy_completion( acceptsock->fd, newfd );
2088 release_object( acceptsock->fd );
2089 acceptsock->fd = newfd;
2091 unix_len = sizeof(unix_addr);
2092 if (!getsockname( get_unix_fd( newfd ), &unix_addr.addr, &unix_len ))
2093 acceptsock->addr_len = sockaddr_from_unix( &unix_addr, &acceptsock->addr.addr, sizeof(acceptsock->addr) );
2095 clear_error();
2096 sock->pending_events &= ~AFD_POLL_ACCEPT;
2097 sock->reported_events &= ~AFD_POLL_ACCEPT;
2098 sock_reselect( sock );
2100 return TRUE;
2103 #ifdef IP_BOUND_IF
2105 static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
2107 static const int enable = 1;
2108 unsigned int index;
2110 if (!(index = if_nametoindex( name )))
2111 return -1;
2113 if (setsockopt( fd, IPPROTO_IP, IP_BOUND_IF, &index, sizeof(index) ))
2114 return -1;
2116 return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable) );
2119 #elif defined(IP_UNICAST_IF) && defined(SO_ATTACH_FILTER) && defined(SO_BINDTODEVICE)
2121 struct interface_filter
2123 struct sock_filter iface_memaddr;
2124 struct sock_filter iface_rule;
2125 struct sock_filter ip_memaddr;
2126 struct sock_filter ip_rule;
2127 struct sock_filter return_keep;
2128 struct sock_filter return_dump;
2130 # define FILTER_JUMP_DUMP(here) (u_char)(offsetof(struct interface_filter, return_dump) \
2131 -offsetof(struct interface_filter, here)-sizeof(struct sock_filter)) \
2132 /sizeof(struct sock_filter)
2133 # define FILTER_JUMP_KEEP(here) (u_char)(offsetof(struct interface_filter, return_keep) \
2134 -offsetof(struct interface_filter, here)-sizeof(struct sock_filter)) \
2135 /sizeof(struct sock_filter)
2136 # define FILTER_JUMP_NEXT() (u_char)(0)
2137 # define SKF_NET_DESTIP 16 /* offset in the network header to the destination IP */
2138 static struct interface_filter generic_interface_filter =
2140 /* This filter rule allows incoming packets on the specified interface, which works for all
2141 * remotely generated packets and for locally generated broadcast packets. */
2142 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX),
2143 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0xdeadbeef, FILTER_JUMP_KEEP(iface_rule), FILTER_JUMP_NEXT()),
2144 /* This rule allows locally generated packets targeted at the specific IP address of the chosen
2145 * adapter (local packets not destined for the broadcast address do not have IFINDEX set) */
2146 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, SKF_NET_OFF+SKF_NET_DESTIP),
2147 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0xdeadbeef, FILTER_JUMP_KEEP(ip_rule), FILTER_JUMP_DUMP(ip_rule)),
2148 BPF_STMT(BPF_RET+BPF_K, (u_int)-1), /* keep packet */
2149 BPF_STMT(BPF_RET+BPF_K, 0) /* dump packet */
2152 static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
2154 struct interface_filter specific_interface_filter;
2155 struct sock_fprog filter_prog;
2156 static const int enable = 1;
2157 unsigned int index;
2158 in_addr_t ifindex;
2160 if (!setsockopt( fd, SOL_SOCKET, SO_BINDTODEVICE, name, strlen( name ) + 1 ))
2161 return 0;
2163 /* SO_BINDTODEVICE requires NET_CAP_RAW until Linux 5.7. */
2164 if (debug_level)
2165 fprintf( stderr, "setsockopt SO_BINDTODEVICE fd %d, name %s failed: %s, falling back to SO_REUSE_ADDR\n",
2166 fd, name, strerror( errno ));
2168 if (!(index = if_nametoindex( name )))
2169 return -1;
2171 ifindex = htonl( index );
2172 if (setsockopt( fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex) ) < 0)
2173 return -1;
2175 specific_interface_filter = generic_interface_filter;
2176 specific_interface_filter.iface_rule.k = index;
2177 specific_interface_filter.ip_rule.k = htonl( bind_addr );
2178 filter_prog.len = sizeof(generic_interface_filter) / sizeof(struct sock_filter);
2179 filter_prog.filter = (struct sock_filter *)&specific_interface_filter;
2180 if (setsockopt( fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter_prog, sizeof(filter_prog) ))
2181 return -1;
2183 return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable) );
2186 #else
2188 static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
2190 errno = EOPNOTSUPP;
2191 return -1;
2194 #endif /* LINUX_BOUND_IF */
2196 /* Take bind() calls on any name corresponding to a local network adapter and
2197 * restrict the given socket to operating only on the specified interface. This
2198 * restriction consists of two components:
2199 * 1) An outgoing packet restriction suggesting the egress interface for all
2200 * packets.
2201 * 2) An incoming packet restriction dropping packets not meant for the
2202 * interface.
2203 * If the function succeeds in placing these restrictions, then the name for the
2204 * bind() may safely be changed to INADDR_ANY, permitting the transmission and
2205 * receipt of broadcast packets on the socket. This behavior is only relevant to
2206 * UDP sockets and is needed for applications that expect to be able to receive
2207 * broadcast packets on a socket that is bound to a specific network interface.
2209 static int bind_to_interface( struct sock *sock, const struct sockaddr_in *addr )
2211 in_addr_t bind_addr = addr->sin_addr.s_addr;
2212 struct ifaddrs *ifaddrs, *ifaddr;
2213 int fd = get_unix_fd( sock->fd );
2214 int err = 0;
2216 if (bind_addr == htonl( INADDR_ANY ) || bind_addr == htonl( INADDR_LOOPBACK ))
2217 return 0;
2218 if (sock->type != WS_SOCK_DGRAM)
2219 return 0;
2221 if (getifaddrs( &ifaddrs ) < 0) return 0;
2223 for (ifaddr = ifaddrs; ifaddr != NULL; ifaddr = ifaddr->ifa_next)
2225 if (ifaddr->ifa_addr && ifaddr->ifa_addr->sa_family == AF_INET
2226 && ((struct sockaddr_in *)ifaddr->ifa_addr)->sin_addr.s_addr == bind_addr)
2228 if ((err = bind_to_iface_name( fd, bind_addr, ifaddr->ifa_name )) < 0)
2230 if (debug_level)
2231 fprintf( stderr, "failed to bind to interface: %s\n", strerror( errno ) );
2233 break;
2236 freeifaddrs( ifaddrs );
2237 return !err;
2240 #ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
2241 static unsigned int get_ipv6_interface_index( const struct in6_addr *addr )
2243 struct ifaddrs *ifaddrs, *ifaddr;
2245 if (getifaddrs( &ifaddrs ) < 0) return 0;
2247 for (ifaddr = ifaddrs; ifaddr != NULL; ifaddr = ifaddr->ifa_next)
2249 if (ifaddr->ifa_addr && ifaddr->ifa_addr->sa_family == AF_INET6
2250 && !memcmp( &((struct sockaddr_in6 *)ifaddr->ifa_addr)->sin6_addr, addr, sizeof(*addr) ))
2252 unsigned int index = if_nametoindex( ifaddr->ifa_name );
2254 if (!index)
2256 if (debug_level)
2257 fprintf( stderr, "Unable to look up interface index for %s: %s\n",
2258 ifaddr->ifa_name, strerror( errno ) );
2259 continue;
2262 freeifaddrs( ifaddrs );
2263 return index;
2267 freeifaddrs( ifaddrs );
2268 return 0;
2270 #endif
2272 /* return an errno value mapped to a WSA error */
2273 static unsigned int sock_get_error( int err )
2275 switch (err)
2277 case EINTR: return WSAEINTR;
2278 case EBADF: return WSAEBADF;
2279 case EPERM:
2280 case EACCES: return WSAEACCES;
2281 case EFAULT: return WSAEFAULT;
2282 case EINVAL: return WSAEINVAL;
2283 case EMFILE: return WSAEMFILE;
2284 case EINPROGRESS:
2285 case EWOULDBLOCK: return WSAEWOULDBLOCK;
2286 case EALREADY: return WSAEALREADY;
2287 case ENOTSOCK: return WSAENOTSOCK;
2288 case EDESTADDRREQ: return WSAEDESTADDRREQ;
2289 case EMSGSIZE: return WSAEMSGSIZE;
2290 case EPROTOTYPE: return WSAEPROTOTYPE;
2291 case ENOPROTOOPT: return WSAENOPROTOOPT;
2292 case EPROTONOSUPPORT: return WSAEPROTONOSUPPORT;
2293 case ESOCKTNOSUPPORT: return WSAESOCKTNOSUPPORT;
2294 case EOPNOTSUPP: return WSAEOPNOTSUPP;
2295 case EPFNOSUPPORT: return WSAEPFNOSUPPORT;
2296 case EAFNOSUPPORT: return WSAEAFNOSUPPORT;
2297 case EADDRINUSE: return WSAEADDRINUSE;
2298 case EADDRNOTAVAIL: return WSAEADDRNOTAVAIL;
2299 case ENETDOWN: return WSAENETDOWN;
2300 case ENETUNREACH: return WSAENETUNREACH;
2301 case ENETRESET: return WSAENETRESET;
2302 case ECONNABORTED: return WSAECONNABORTED;
2303 case EPIPE:
2304 case ECONNRESET: return WSAECONNRESET;
2305 case ENOBUFS: return WSAENOBUFS;
2306 case EISCONN: return WSAEISCONN;
2307 case ENOTCONN: return WSAENOTCONN;
2308 case ESHUTDOWN: return WSAESHUTDOWN;
2309 case ETOOMANYREFS: return WSAETOOMANYREFS;
2310 case ETIMEDOUT: return WSAETIMEDOUT;
2311 case ECONNREFUSED: return WSAECONNREFUSED;
2312 case ELOOP: return WSAELOOP;
2313 case ENAMETOOLONG: return WSAENAMETOOLONG;
2314 case EHOSTDOWN: return WSAEHOSTDOWN;
2315 case EHOSTUNREACH: return WSAEHOSTUNREACH;
2316 case ENOTEMPTY: return WSAENOTEMPTY;
2317 #ifdef EPROCLIM
2318 case EPROCLIM: return WSAEPROCLIM;
2319 #endif
2320 #ifdef EUSERS
2321 case EUSERS: return WSAEUSERS;
2322 #endif
2323 #ifdef EDQUOT
2324 case EDQUOT: return WSAEDQUOT;
2325 #endif
2326 #ifdef ESTALE
2327 case ESTALE: return WSAESTALE;
2328 #endif
2329 #ifdef EREMOTE
2330 case EREMOTE: return WSAEREMOTE;
2331 #endif
2333 case 0: return 0;
2334 default:
2335 errno = err;
2336 perror("wineserver: sock_get_error() can't map error");
2337 return WSAEFAULT;
2341 static int sock_get_ntstatus( int err )
2343 switch ( err )
2345 case EBADF: return STATUS_INVALID_HANDLE;
2346 case EBUSY: return STATUS_DEVICE_BUSY;
2347 case EPERM:
2348 case EACCES: return STATUS_ACCESS_DENIED;
2349 case EFAULT: return STATUS_ACCESS_VIOLATION;
2350 case EINVAL: return STATUS_INVALID_PARAMETER;
2351 case ENFILE:
2352 case EMFILE: return STATUS_TOO_MANY_OPENED_FILES;
2353 case EINPROGRESS:
2354 case EWOULDBLOCK: return STATUS_DEVICE_NOT_READY;
2355 case EALREADY: return STATUS_NETWORK_BUSY;
2356 case ENOTSOCK: return STATUS_OBJECT_TYPE_MISMATCH;
2357 case EDESTADDRREQ: return STATUS_INVALID_PARAMETER;
2358 case EMSGSIZE: return STATUS_BUFFER_OVERFLOW;
2359 case EPROTONOSUPPORT:
2360 case ESOCKTNOSUPPORT:
2361 case EPFNOSUPPORT:
2362 case EAFNOSUPPORT:
2363 case EPROTOTYPE: return STATUS_NOT_SUPPORTED;
2364 case ENOPROTOOPT: return STATUS_INVALID_PARAMETER;
2365 case EOPNOTSUPP: return STATUS_NOT_SUPPORTED;
2366 case EADDRINUSE: return STATUS_SHARING_VIOLATION;
2367 /* Linux returns ENODEV when specifying an invalid sin6_scope_id;
2368 * Windows returns STATUS_INVALID_ADDRESS_COMPONENT */
2369 case ENODEV:
2370 case EADDRNOTAVAIL: return STATUS_INVALID_ADDRESS_COMPONENT;
2371 case ECONNREFUSED: return STATUS_CONNECTION_REFUSED;
2372 case ESHUTDOWN: return STATUS_PIPE_DISCONNECTED;
2373 case ENOTCONN: return STATUS_INVALID_CONNECTION;
2374 case ETIMEDOUT: return STATUS_IO_TIMEOUT;
2375 case ENETUNREACH: return STATUS_NETWORK_UNREACHABLE;
2376 case EHOSTUNREACH: return STATUS_HOST_UNREACHABLE;
2377 case ENETDOWN: return STATUS_NETWORK_BUSY;
2378 case EPIPE:
2379 case ECONNRESET: return STATUS_CONNECTION_RESET;
2380 case ECONNABORTED: return STATUS_CONNECTION_ABORTED;
2381 case EISCONN: return STATUS_CONNECTION_ACTIVE;
2383 case 0: return STATUS_SUCCESS;
2384 default:
2385 errno = err;
2386 perror("wineserver: sock_get_ntstatus() can't map error");
2387 return STATUS_UNSUCCESSFUL;
2391 static struct accept_req *alloc_accept_req( struct sock *sock, struct sock *acceptsock, struct async *async,
2392 const struct afd_accept_into_params *params )
2394 struct accept_req *req = mem_alloc( sizeof(*req) );
2396 if (req)
2398 req->async = (struct async *)grab_object( async );
2399 req->iosb = async_get_iosb( async );
2400 req->sock = (struct sock *)grab_object( sock );
2401 req->acceptsock = acceptsock;
2402 if (acceptsock) grab_object( acceptsock );
2403 req->accepted = 0;
2404 req->recv_len = 0;
2405 req->local_len = 0;
2406 if (params)
2408 req->recv_len = params->recv_len;
2409 req->local_len = params->local_len;
2412 return req;
2415 static void sock_ioctl( struct fd *fd, ioctl_code_t code, struct async *async )
2417 struct sock *sock = get_fd_user( fd );
2418 int unix_fd = -1;
2420 assert( sock->obj.ops == &sock_ops );
2422 if (code != IOCTL_AFD_WINE_CREATE && code != IOCTL_AFD_POLL && (unix_fd = get_unix_fd( fd )) < 0)
2423 return;
2425 switch(code)
2427 case IOCTL_AFD_WINE_CREATE:
2429 const struct afd_create_params *params = get_req_data();
2431 if (get_req_data_size() != sizeof(*params))
2433 set_error( STATUS_INVALID_PARAMETER );
2434 return;
2436 init_socket( sock, params->family, params->type, params->protocol );
2437 return;
2440 case IOCTL_AFD_WINE_ACCEPT:
2442 struct sock *acceptsock;
2443 obj_handle_t handle;
2445 if (get_reply_max_size() != sizeof(handle))
2447 set_error( STATUS_BUFFER_TOO_SMALL );
2448 return;
2451 if (!(acceptsock = accept_socket( sock )))
2453 struct accept_req *req;
2455 if (sock->nonblocking) return;
2456 if (get_error() != STATUS_DEVICE_NOT_READY) return;
2458 if (!(req = alloc_accept_req( sock, NULL, async, NULL ))) return;
2459 list_add_tail( &sock->accept_list, &req->entry );
2461 async_set_completion_callback( async, free_accept_req, req );
2462 queue_async( &sock->accept_q, async );
2463 sock_reselect( sock );
2464 set_error( STATUS_PENDING );
2465 return;
2467 handle = alloc_handle( current->process, &acceptsock->obj,
2468 GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, OBJ_INHERIT );
2469 acceptsock->wparam = handle;
2470 sock_reselect( acceptsock );
2471 release_object( acceptsock );
2472 set_reply_data( &handle, sizeof(handle) );
2473 return;
2476 case IOCTL_AFD_WINE_ACCEPT_INTO:
2478 static const int access = FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES | FILE_READ_DATA;
2479 const struct afd_accept_into_params *params = get_req_data();
2480 struct sock *acceptsock;
2481 unsigned int remote_len;
2482 struct accept_req *req;
2484 if (get_req_data_size() != sizeof(*params) ||
2485 get_reply_max_size() < params->recv_len ||
2486 get_reply_max_size() - params->recv_len < params->local_len)
2488 set_error( STATUS_BUFFER_TOO_SMALL );
2489 return;
2492 remote_len = get_reply_max_size() - params->recv_len - params->local_len;
2493 if (remote_len < sizeof(int))
2495 set_error( STATUS_INVALID_PARAMETER );
2496 return;
2499 if (!(acceptsock = (struct sock *)get_handle_obj( current->process, params->accept_handle, access, &sock_ops )))
2500 return;
2502 if (acceptsock->accept_recv_req)
2504 release_object( acceptsock );
2505 set_error( STATUS_INVALID_PARAMETER );
2506 return;
2509 if (!(req = alloc_accept_req( sock, acceptsock, async, params )))
2511 release_object( acceptsock );
2512 return;
2514 list_add_tail( &sock->accept_list, &req->entry );
2515 acceptsock->accept_recv_req = req;
2516 release_object( acceptsock );
2518 acceptsock->wparam = params->accept_handle;
2519 async_set_completion_callback( async, free_accept_req, req );
2520 queue_async( &sock->accept_q, async );
2521 sock_reselect( sock );
2522 set_error( STATUS_PENDING );
2523 return;
2526 case IOCTL_AFD_LISTEN:
2528 const struct afd_listen_params *params = get_req_data();
2530 if (get_req_data_size() < sizeof(*params))
2532 set_error( STATUS_INVALID_PARAMETER );
2533 return;
2536 if (!sock->bound)
2538 set_error( STATUS_INVALID_PARAMETER );
2539 return;
2542 if (listen( unix_fd, params->backlog ) < 0)
2544 set_error( sock_get_ntstatus( errno ) );
2545 return;
2548 sock->state = SOCK_LISTENING;
2550 /* a listening socket can no longer be accepted into */
2551 allow_fd_caching( sock->fd );
2553 /* we may already be selecting for AFD_POLL_ACCEPT */
2554 sock_reselect( sock );
2555 return;
2558 case IOCTL_AFD_WINE_CONNECT:
2560 const struct afd_connect_params *params = get_req_data();
2561 const struct WS_sockaddr *addr;
2562 union unix_sockaddr unix_addr;
2563 struct connect_req *req;
2564 socklen_t unix_len;
2565 int send_len, ret;
2567 if (get_req_data_size() < sizeof(*params) ||
2568 get_req_data_size() - sizeof(*params) < params->addr_len)
2570 set_error( STATUS_BUFFER_TOO_SMALL );
2571 return;
2573 send_len = get_req_data_size() - sizeof(*params) - params->addr_len;
2574 addr = (const struct WS_sockaddr *)(params + 1);
2576 if (!params->synchronous && !sock->bound)
2578 set_error( STATUS_INVALID_PARAMETER );
2579 return;
2582 if (sock->accept_recv_req)
2584 set_error( STATUS_INVALID_PARAMETER );
2585 return;
2588 if (sock->connect_req)
2590 set_error( STATUS_INVALID_PARAMETER );
2591 return;
2594 switch (sock->state)
2596 case SOCK_LISTENING:
2597 set_error( STATUS_INVALID_PARAMETER );
2598 return;
2600 case SOCK_CONNECTING:
2601 /* FIXME: STATUS_ADDRESS_ALREADY_ASSOCIATED probably isn't right,
2602 * but there's no status code that maps to WSAEALREADY... */
2603 set_error( params->synchronous ? STATUS_ADDRESS_ALREADY_ASSOCIATED : STATUS_INVALID_PARAMETER );
2604 return;
2606 case SOCK_CONNECTED:
2607 set_error( STATUS_CONNECTION_ACTIVE );
2608 return;
2610 case SOCK_UNCONNECTED:
2611 case SOCK_CONNECTIONLESS:
2612 break;
2615 unix_len = sockaddr_to_unix( addr, params->addr_len, &unix_addr );
2616 if (!unix_len)
2618 set_error( STATUS_INVALID_ADDRESS );
2619 return;
2621 if (unix_addr.addr.sa_family == AF_INET && !memcmp( &unix_addr.in.sin_addr, magic_loopback_addr, 4 ))
2622 unix_addr.in.sin_addr.s_addr = htonl( INADDR_LOOPBACK );
2624 ret = connect( unix_fd, &unix_addr.addr, unix_len );
2625 if (ret < 0 && errno == ECONNABORTED)
2627 /* On Linux with nonblocking socket if the previous connect() failed for any reason (including
2628 * timeout), next connect will fail. If the error code was queried by getsockopt( SO_ERROR )
2629 * the error code returned now is ECONNABORTED (otherwise that is the actual connect() failure
2630 * error code). If we got here after previous connect attempt on the socket that means
2631 * we already queried SO_ERROR in sock_error(), so retrying on ECONNABORTED only is
2632 * sufficient. */
2633 ret = connect( unix_fd, &unix_addr.addr, unix_len );
2636 if (ret < 0 && errno != EINPROGRESS)
2638 set_error( sock_get_ntstatus( errno ) );
2639 return;
2642 /* a connected or connecting socket can no longer be accepted into */
2643 allow_fd_caching( sock->fd );
2645 unix_len = sizeof(unix_addr);
2646 if (!getsockname( unix_fd, &unix_addr.addr, &unix_len ))
2647 sock->addr_len = sockaddr_from_unix( &unix_addr, &sock->addr.addr, sizeof(sock->addr) );
2648 sock->bound = 1;
2650 if (!ret)
2652 if (sock->type != WS_SOCK_DGRAM)
2654 sock->state = SOCK_CONNECTED;
2655 sock->connect_time = current_time;
2658 if (!send_len) return;
2661 if (sock->type != WS_SOCK_DGRAM)
2662 sock->state = SOCK_CONNECTING;
2664 if (params->synchronous && sock->nonblocking)
2666 sock_reselect( sock );
2667 set_error( STATUS_DEVICE_NOT_READY );
2668 return;
2671 if (!(req = mem_alloc( sizeof(*req) )))
2672 return;
2674 req->async = (struct async *)grab_object( async );
2675 req->iosb = async_get_iosb( async );
2676 req->sock = (struct sock *)grab_object( sock );
2677 req->addr_len = params->addr_len;
2678 req->send_len = send_len;
2679 req->send_cursor = 0;
2681 async_set_completion_callback( async, free_connect_req, req );
2682 sock->connect_req = req;
2683 queue_async( &sock->connect_q, async );
2684 sock_reselect( sock );
2685 set_error( STATUS_PENDING );
2686 return;
2689 case IOCTL_AFD_WINE_SHUTDOWN:
2691 unsigned int how;
2693 if (get_req_data_size() < sizeof(int))
2695 set_error( STATUS_BUFFER_TOO_SMALL );
2696 return;
2698 how = *(int *)get_req_data();
2700 if (how > SD_BOTH)
2702 set_error( STATUS_INVALID_PARAMETER );
2703 return;
2706 if (sock->state != SOCK_CONNECTED && sock->state != SOCK_CONNECTIONLESS)
2708 set_error( STATUS_INVALID_CONNECTION );
2709 return;
2712 if (how != SD_SEND)
2714 sock->rd_shutdown = 1;
2716 if (how != SD_RECEIVE)
2718 sock->wr_shutdown = 1;
2719 if (list_empty( &sock->write_q.queue ))
2720 shutdown( unix_fd, SHUT_WR );
2721 else
2722 sock->wr_shutdown_pending = 1;
2725 if (how == SD_BOTH)
2727 if (sock->event) release_object( sock->event );
2728 sock->event = NULL;
2729 sock->window = 0;
2730 sock->mask = 0;
2731 sock->nonblocking = 1;
2734 sock_reselect( sock );
2735 return;
2738 case IOCTL_AFD_WINE_ADDRESS_LIST_CHANGE:
2740 int force_async;
2742 if (get_req_data_size() < sizeof(int))
2744 set_error( STATUS_BUFFER_TOO_SMALL );
2745 return;
2747 force_async = *(int *)get_req_data();
2749 if (sock->nonblocking && !force_async)
2751 set_error( STATUS_DEVICE_NOT_READY );
2752 return;
2754 if (!sock_get_ifchange( sock )) return;
2755 queue_async( &sock->ifchange_q, async );
2756 set_error( STATUS_PENDING );
2757 return;
2760 case IOCTL_AFD_WINE_FIONBIO:
2761 if (get_req_data_size() < sizeof(int))
2763 set_error( STATUS_BUFFER_TOO_SMALL );
2764 return;
2766 if (*(int *)get_req_data())
2768 sock->nonblocking = 1;
2770 else
2772 if (sock->mask)
2774 set_error( STATUS_INVALID_PARAMETER );
2775 return;
2777 sock->nonblocking = 0;
2779 return;
2781 case IOCTL_AFD_EVENT_SELECT:
2783 struct event *event = NULL;
2784 obj_handle_t event_handle;
2785 int mask;
2787 set_async_pending( async );
2789 if (is_machine_64bit( current->process->machine ))
2791 const struct afd_event_select_params_64 *params = get_req_data();
2793 if (get_req_data_size() < sizeof(*params))
2795 set_error( STATUS_INVALID_PARAMETER );
2796 return;
2799 event_handle = params->event;
2800 mask = params->mask;
2802 else
2804 const struct afd_event_select_params_32 *params = get_req_data();
2806 if (get_req_data_size() < sizeof(*params))
2808 set_error( STATUS_INVALID_PARAMETER );
2809 return;
2812 event_handle = params->event;
2813 mask = params->mask;
2816 if ((event_handle || mask) &&
2817 !(event = get_event_obj( current->process, event_handle, EVENT_MODIFY_STATE )))
2819 set_error( STATUS_INVALID_PARAMETER );
2820 return;
2823 if (sock->event) release_object( sock->event );
2824 sock->event = event;
2825 sock->mask = mask;
2826 sock->window = 0;
2827 sock->message = 0;
2828 sock->wparam = 0;
2829 sock->nonblocking = 1;
2831 sock_reselect( sock );
2832 /* Explicitly wake the socket up if the mask causes it to become
2833 * signaled. Note that reselecting isn't enough, since we might already
2834 * have had events recorded in sock->reported_events and we don't want
2835 * to select for them again. */
2836 sock_wake_up( sock );
2838 return;
2841 case IOCTL_AFD_WINE_MESSAGE_SELECT:
2843 const struct afd_message_select_params *params = get_req_data();
2845 if (get_req_data_size() < sizeof(params))
2847 set_error( STATUS_BUFFER_TOO_SMALL );
2848 return;
2851 if (sock->event) release_object( sock->event );
2853 if (params->window)
2855 sock->pending_events = 0;
2856 sock->reported_events = 0;
2858 sock->event = NULL;
2859 sock->mask = params->mask;
2860 sock->window = params->window;
2861 sock->message = params->message;
2862 sock->wparam = params->handle;
2863 sock->nonblocking = 1;
2865 sock_reselect( sock );
2867 return;
2870 case IOCTL_AFD_BIND:
2872 const struct afd_bind_params *params = get_req_data();
2873 union unix_sockaddr unix_addr, bind_addr;
2874 data_size_t in_size;
2875 socklen_t unix_len;
2876 int v6only = 1;
2878 /* the ioctl is METHOD_NEITHER, so ntdll gives us the output buffer as
2879 * input */
2880 if (get_req_data_size() < get_reply_max_size())
2882 set_error( STATUS_BUFFER_TOO_SMALL );
2883 return;
2885 in_size = get_req_data_size() - get_reply_max_size();
2886 if (in_size < offsetof(struct afd_bind_params, addr.sa_data)
2887 || get_reply_max_size() < in_size - sizeof(int))
2889 set_error( STATUS_INVALID_PARAMETER );
2890 return;
2893 if (sock->bound)
2895 set_error( STATUS_ADDRESS_ALREADY_ASSOCIATED );
2896 return;
2899 unix_len = sockaddr_to_unix( &params->addr, in_size - sizeof(int), &unix_addr );
2900 if (!unix_len)
2902 set_error( STATUS_INVALID_ADDRESS );
2903 return;
2905 bind_addr = unix_addr;
2907 if (unix_addr.addr.sa_family == AF_INET)
2909 if (!memcmp( &unix_addr.in.sin_addr, magic_loopback_addr, 4 )
2910 || bind_to_interface( sock, &unix_addr.in ))
2911 bind_addr.in.sin_addr.s_addr = htonl( INADDR_ANY );
2913 else if (unix_addr.addr.sa_family == AF_INET6)
2915 #ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
2916 /* Windows allows specifying zero to use the default scope. Linux
2917 * interprets it as an interface index and requires that it be
2918 * nonzero. */
2919 if (!unix_addr.in6.sin6_scope_id)
2920 bind_addr.in6.sin6_scope_id = get_ipv6_interface_index( &unix_addr.in6.sin6_addr );
2921 #endif
2924 set_async_pending( async );
2926 #ifdef IPV6_V6ONLY
2927 if (sock->family == WS_AF_INET6)
2929 socklen_t len = sizeof(v6only);
2931 getsockopt( get_unix_fd(sock->fd), IPPROTO_IPV6, IPV6_V6ONLY, &v6only, &len );
2933 #endif
2935 if (check_addr_usage( sock, &bind_addr, v6only ))
2936 return;
2938 if (bind( unix_fd, &bind_addr.addr, unix_len ) < 0)
2940 if (errno == EADDRINUSE && sock->reuseaddr)
2941 errno = EACCES;
2943 set_error( sock_get_ntstatus( errno ) );
2944 return;
2947 sock->bound = 1;
2949 unix_len = sizeof(bind_addr);
2950 if (!getsockname( unix_fd, &bind_addr.addr, &unix_len ))
2952 /* store the interface or magic loopback address instead of the
2953 * actual unix address */
2954 if (bind_addr.addr.sa_family == AF_INET)
2955 bind_addr.in.sin_addr = unix_addr.in.sin_addr;
2956 sock->addr_len = sockaddr_from_unix( &bind_addr, &sock->addr.addr, sizeof(sock->addr) );
2959 update_addr_usage( sock, &bind_addr, v6only );
2961 if (get_reply_max_size() >= sock->addr_len)
2962 set_reply_data( &sock->addr, sock->addr_len );
2963 return;
2966 case IOCTL_AFD_GETSOCKNAME:
2967 if (!sock->bound)
2969 set_error( STATUS_INVALID_PARAMETER );
2970 return;
2973 if (get_reply_max_size() < sock->addr_len)
2975 set_error( STATUS_BUFFER_TOO_SMALL );
2976 return;
2979 set_reply_data( &sock->addr, sock->addr_len );
2980 return;
2982 case IOCTL_AFD_WINE_DEFER:
2984 const obj_handle_t *handle = get_req_data();
2985 struct sock *acceptsock;
2987 if (get_req_data_size() < sizeof(*handle))
2989 set_error( STATUS_BUFFER_TOO_SMALL );
2990 return;
2993 acceptsock = (struct sock *)get_handle_obj( current->process, *handle, 0, &sock_ops );
2994 if (!acceptsock) return;
2996 sock->deferred = acceptsock;
2997 return;
3000 case IOCTL_AFD_WINE_GET_INFO:
3002 struct afd_get_info_params params;
3004 if (get_reply_max_size() < sizeof(params))
3006 set_error( STATUS_BUFFER_TOO_SMALL );
3007 return;
3010 params.family = sock->family;
3011 params.type = sock->type;
3012 params.protocol = sock->proto;
3013 set_reply_data( &params, sizeof(params) );
3014 return;
3017 case IOCTL_AFD_WINE_GET_SO_ACCEPTCONN:
3019 int listening = (sock->state == SOCK_LISTENING);
3021 if (get_reply_max_size() < sizeof(listening))
3023 set_error( STATUS_BUFFER_TOO_SMALL );
3024 return;
3027 set_reply_data( &listening, sizeof(listening) );
3028 return;
3031 case IOCTL_AFD_WINE_GET_SO_ERROR:
3033 int error;
3034 unsigned int i;
3036 if (get_reply_max_size() < sizeof(error))
3038 set_error( STATUS_BUFFER_TOO_SMALL );
3039 return;
3042 error = sock_error( sock );
3043 if (!error)
3045 for (i = 0; i < ARRAY_SIZE( sock->errors ); ++i)
3047 if (sock->errors[i])
3049 error = sock->errors[i];
3050 break;
3055 error = sock_get_error( error );
3056 set_reply_data( &error, sizeof(error) );
3057 return;
3060 case IOCTL_AFD_WINE_GET_SO_RCVBUF:
3062 int rcvbuf = sock->rcvbuf;
3064 if (get_reply_max_size() < sizeof(rcvbuf))
3066 set_error( STATUS_BUFFER_TOO_SMALL );
3067 return;
3070 set_reply_data( &rcvbuf, sizeof(rcvbuf) );
3071 return;
3074 case IOCTL_AFD_WINE_SET_SO_RCVBUF:
3076 DWORD rcvbuf, set_rcvbuf;
3078 if (get_req_data_size() < sizeof(rcvbuf))
3080 set_error( STATUS_BUFFER_TOO_SMALL );
3081 return;
3083 rcvbuf = *(DWORD *)get_req_data();
3084 set_rcvbuf = max( rcvbuf, MIN_RCVBUF );
3086 if (!setsockopt( unix_fd, SOL_SOCKET, SO_RCVBUF, (char *)&set_rcvbuf, sizeof(set_rcvbuf) ))
3087 sock->rcvbuf = rcvbuf;
3088 else
3089 set_error( sock_get_ntstatus( errno ) );
3090 return;
3093 case IOCTL_AFD_WINE_GET_SO_RCVTIMEO:
3095 DWORD rcvtimeo = sock->rcvtimeo;
3097 if (get_reply_max_size() < sizeof(rcvtimeo))
3099 set_error( STATUS_BUFFER_TOO_SMALL );
3100 return;
3103 set_reply_data( &rcvtimeo, sizeof(rcvtimeo) );
3104 return;
3107 case IOCTL_AFD_WINE_SET_SO_RCVTIMEO:
3109 DWORD rcvtimeo;
3111 if (get_req_data_size() < sizeof(rcvtimeo))
3113 set_error( STATUS_BUFFER_TOO_SMALL );
3114 return;
3116 rcvtimeo = *(DWORD *)get_req_data();
3118 sock->rcvtimeo = rcvtimeo;
3119 return;
3122 /* BSD socket SO_REUSEADDR is not compatible with winsock semantics. */
3123 case IOCTL_AFD_WINE_SET_SO_REUSEADDR:
3125 int reuse, ret;
3127 if (get_req_data_size() < sizeof(reuse))
3129 set_error( STATUS_BUFFER_TOO_SMALL );
3130 return;
3133 reuse = *(int *)get_req_data();
3135 if (reuse && sock->exclusiveaddruse)
3137 set_error( STATUS_INVALID_PARAMETER );
3138 return;
3141 if (is_tcp_socket( sock ))
3142 ret = 0;
3143 else
3144 ret = setsockopt( unix_fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse) );
3145 #ifdef __APPLE__
3146 if (!ret) ret = setsockopt( unix_fd, SOL_SOCKET, SO_REUSEPORT, &reuse, sizeof(reuse) );
3147 #endif
3148 if (ret)
3149 set_error( sock_get_ntstatus( errno ) );
3150 else
3151 sock->reuseaddr = !!reuse;
3152 return;
3155 case IOCTL_AFD_WINE_SET_SO_EXCLUSIVEADDRUSE:
3157 int exclusive;
3159 if (get_req_data_size() < sizeof(exclusive))
3161 set_error( STATUS_BUFFER_TOO_SMALL );
3162 return;
3165 exclusive = *(int *)get_req_data();
3166 if (exclusive && sock->reuseaddr)
3168 set_error( STATUS_INVALID_PARAMETER );
3169 return;
3171 sock->exclusiveaddruse = !!exclusive;
3172 return;
3175 case IOCTL_AFD_WINE_GET_SO_SNDBUF:
3177 int sndbuf = sock->sndbuf;
3179 if (get_reply_max_size() < sizeof(sndbuf))
3181 set_error( STATUS_BUFFER_TOO_SMALL );
3182 return;
3185 set_reply_data( &sndbuf, sizeof(sndbuf) );
3186 return;
3189 case IOCTL_AFD_WINE_SET_SO_SNDBUF:
3191 DWORD sndbuf;
3193 if (get_req_data_size() < sizeof(sndbuf))
3195 set_error( STATUS_BUFFER_TOO_SMALL );
3196 return;
3198 sndbuf = *(DWORD *)get_req_data();
3200 #ifdef __APPLE__
3201 if (!sndbuf)
3203 /* setsockopt fails if a zero value is passed */
3204 sock->sndbuf = sndbuf;
3205 return;
3207 #endif
3209 if (!setsockopt( unix_fd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, sizeof(sndbuf) ))
3210 sock->sndbuf = sndbuf;
3211 else
3212 set_error( sock_get_ntstatus( errno ) );
3213 return;
3216 case IOCTL_AFD_WINE_GET_SO_SNDTIMEO:
3218 DWORD sndtimeo = sock->sndtimeo;
3220 if (get_reply_max_size() < sizeof(sndtimeo))
3222 set_error( STATUS_BUFFER_TOO_SMALL );
3223 return;
3226 set_reply_data( &sndtimeo, sizeof(sndtimeo) );
3227 return;
3230 case IOCTL_AFD_WINE_SET_SO_SNDTIMEO:
3232 DWORD sndtimeo;
3234 if (get_req_data_size() < sizeof(sndtimeo))
3236 set_error( STATUS_BUFFER_TOO_SMALL );
3237 return;
3239 sndtimeo = *(DWORD *)get_req_data();
3241 sock->sndtimeo = sndtimeo;
3242 return;
3245 case IOCTL_AFD_WINE_GET_SO_CONNECT_TIME:
3247 DWORD time = ~0u;
3249 if (get_reply_max_size() < sizeof(time))
3251 set_error( STATUS_BUFFER_TOO_SMALL );
3252 return;
3255 if (sock->state == SOCK_CONNECTED)
3256 time = (current_time - sock->connect_time) / 10000000;
3258 set_reply_data( &time, sizeof(time) );
3259 return;
3262 case IOCTL_AFD_WINE_GET_SO_REUSEADDR:
3264 int reuse;
3266 if (!get_reply_max_size())
3268 set_error( STATUS_BUFFER_TOO_SMALL );
3269 return;
3272 reuse = sock->reuseaddr;
3273 set_reply_data( &reuse, min( sizeof(reuse), get_reply_max_size() ));
3274 return;
3277 case IOCTL_AFD_WINE_GET_SO_EXCLUSIVEADDRUSE:
3279 int exclusive;
3281 if (!get_reply_max_size())
3283 set_error( STATUS_BUFFER_TOO_SMALL );
3284 return;
3287 exclusive = sock->exclusiveaddruse;
3288 set_reply_data( &exclusive, min( sizeof(exclusive), get_reply_max_size() ));
3289 return;
3292 case IOCTL_AFD_POLL:
3294 if (get_reply_max_size() < get_req_data_size())
3296 set_error( STATUS_INVALID_PARAMETER );
3297 return;
3300 if (is_machine_64bit( current->process->machine ))
3302 const struct afd_poll_params_64 *params = get_req_data();
3304 if (get_req_data_size() < sizeof(struct afd_poll_params_64) ||
3305 get_req_data_size() < offsetof( struct afd_poll_params_64, sockets[params->count] ))
3307 set_error( STATUS_INVALID_PARAMETER );
3308 return;
3311 poll_socket( sock, async, params->exclusive, params->timeout, params->count, params->sockets );
3313 else
3315 const struct afd_poll_params_32 *params = get_req_data();
3316 struct afd_poll_socket_64 *sockets;
3317 unsigned int i;
3319 if (get_req_data_size() < sizeof(struct afd_poll_params_32) ||
3320 get_req_data_size() < offsetof( struct afd_poll_params_32, sockets[params->count] ))
3322 set_error( STATUS_INVALID_PARAMETER );
3323 return;
3326 if (!(sockets = mem_alloc( params->count * sizeof(*sockets) ))) return;
3327 for (i = 0; i < params->count; ++i)
3329 sockets[i].socket = params->sockets[i].socket;
3330 sockets[i].flags = params->sockets[i].flags;
3331 sockets[i].status = params->sockets[i].status;
3334 poll_socket( sock, async, params->exclusive, params->timeout, params->count, sockets );
3335 free( sockets );
3338 return;
3341 default:
3342 set_error( STATUS_NOT_SUPPORTED );
3343 return;
3347 static void handle_exclusive_poll(struct poll_req *req)
3349 unsigned int i;
3351 for (i = 0; i < req->count; ++i)
3353 struct sock *sock = req->sockets[i].sock;
3354 struct poll_req *main_poll = sock->main_poll;
3356 if (main_poll && main_poll->exclusive && req->exclusive)
3358 complete_async_poll( main_poll, STATUS_SUCCESS );
3359 main_poll = NULL;
3362 if (!main_poll)
3363 sock->main_poll = req;
3367 static void poll_socket( struct sock *poll_sock, struct async *async, int exclusive, timeout_t timeout,
3368 unsigned int count, const struct afd_poll_socket_64 *sockets )
3370 BOOL signaled = FALSE;
3371 struct poll_req *req;
3372 unsigned int i, j;
3374 if (!count)
3376 set_error( STATUS_INVALID_PARAMETER );
3377 return;
3380 if (!(req = mem_alloc( offsetof( struct poll_req, sockets[count] ) )))
3381 return;
3383 req->timeout = NULL;
3384 req->pending = 0;
3385 if (timeout && timeout != TIMEOUT_INFINITE &&
3386 !(req->timeout = add_timeout_user( timeout, async_poll_timeout, req )))
3388 free( req );
3389 return;
3391 req->orig_timeout = timeout;
3393 for (i = 0; i < count; ++i)
3395 req->sockets[i].sock = (struct sock *)get_handle_obj( current->process, sockets[i].socket, 0, &sock_ops );
3396 if (!req->sockets[i].sock)
3398 for (j = 0; j < i; ++j) release_object( req->sockets[j].sock );
3399 if (req->timeout) remove_timeout_user( req->timeout );
3400 free( req );
3401 return;
3403 req->sockets[i].handle = sockets[i].socket;
3404 req->sockets[i].mask = sockets[i].flags;
3405 req->sockets[i].flags = 0;
3408 req->exclusive = exclusive;
3409 req->count = count;
3410 req->async = (struct async *)grab_object( async );
3411 req->iosb = async_get_iosb( async );
3413 handle_exclusive_poll(req);
3415 list_add_tail( &poll_list, &req->entry );
3416 async_set_completion_callback( async, free_poll_req, req );
3417 queue_async( &poll_sock->poll_q, async );
3419 for (i = 0; i < count; ++i)
3421 struct sock *sock = req->sockets[i].sock;
3422 int mask = req->sockets[i].mask;
3423 struct pollfd pollfd;
3425 pollfd.fd = get_unix_fd( sock->fd );
3426 pollfd.events = poll_flags_from_afd( sock, mask );
3427 if (pollfd.events >= 0 && poll( &pollfd, 1, 0 ) >= 0)
3428 sock_poll_event( sock->fd, pollfd.revents );
3430 /* FIXME: do other error conditions deserve a similar treatment? */
3431 if (sock->state != SOCK_CONNECTING && sock->errors[AFD_POLL_BIT_CONNECT_ERR] && (mask & AFD_POLL_CONNECT_ERR))
3433 req->sockets[i].flags |= AFD_POLL_CONNECT_ERR;
3434 req->sockets[i].status = sock_get_ntstatus( sock->errors[AFD_POLL_BIT_CONNECT_ERR] );
3437 if (req->sockets[i].flags)
3438 signaled = TRUE;
3441 if (!timeout || signaled)
3442 complete_async_poll( req, STATUS_SUCCESS );
3443 else
3444 req->pending = 1;
3446 for (i = 0; i < req->count; ++i)
3447 sock_reselect( req->sockets[i].sock );
3448 set_error( STATUS_PENDING );
3451 #ifdef HAVE_LINUX_RTNETLINK_H
3453 /* only keep one ifchange object around, all sockets waiting for wakeups will look to it */
3454 static struct object *ifchange_object;
3456 static void ifchange_dump( struct object *obj, int verbose );
3457 static struct fd *ifchange_get_fd( struct object *obj );
3458 static void ifchange_destroy( struct object *obj );
3460 static int ifchange_get_poll_events( struct fd *fd );
3461 static void ifchange_poll_event( struct fd *fd, int event );
3463 struct ifchange
3465 struct object obj; /* object header */
3466 struct fd *fd; /* interface change file descriptor */
3467 struct list sockets; /* list of sockets to send interface change notifications */
3470 static const struct object_ops ifchange_ops =
3472 sizeof(struct ifchange), /* size */
3473 &no_type, /* type */
3474 ifchange_dump, /* dump */
3475 no_add_queue, /* add_queue */
3476 NULL, /* remove_queue */
3477 NULL, /* signaled */
3478 no_satisfied, /* satisfied */
3479 no_signal, /* signal */
3480 ifchange_get_fd, /* get_fd */
3481 default_map_access, /* map_access */
3482 default_get_sd, /* get_sd */
3483 default_set_sd, /* set_sd */
3484 no_get_full_name, /* get_full_name */
3485 no_lookup_name, /* lookup_name */
3486 no_link_name, /* link_name */
3487 NULL, /* unlink_name */
3488 no_open_file, /* open_file */
3489 no_kernel_obj_list, /* get_kernel_obj_list */
3490 no_close_handle, /* close_handle */
3491 ifchange_destroy /* destroy */
3494 static const struct fd_ops ifchange_fd_ops =
3496 ifchange_get_poll_events, /* get_poll_events */
3497 ifchange_poll_event, /* poll_event */
3498 NULL, /* get_fd_type */
3499 no_fd_read, /* read */
3500 no_fd_write, /* write */
3501 no_fd_flush, /* flush */
3502 no_fd_get_file_info, /* get_file_info */
3503 no_fd_get_volume_info, /* get_volume_info */
3504 no_fd_ioctl, /* ioctl */
3505 NULL, /* cancel_async */
3506 NULL, /* queue_async */
3507 NULL /* reselect_async */
3510 static void ifchange_dump( struct object *obj, int verbose )
3512 assert( obj->ops == &ifchange_ops );
3513 fprintf( stderr, "Interface change\n" );
3516 static struct fd *ifchange_get_fd( struct object *obj )
3518 struct ifchange *ifchange = (struct ifchange *)obj;
3519 return (struct fd *)grab_object( ifchange->fd );
3522 static void ifchange_destroy( struct object *obj )
3524 struct ifchange *ifchange = (struct ifchange *)obj;
3525 assert( obj->ops == &ifchange_ops );
3527 release_object( ifchange->fd );
3529 /* reset the global ifchange object so that it will be recreated if it is needed again */
3530 assert( obj == ifchange_object );
3531 ifchange_object = NULL;
3534 static int ifchange_get_poll_events( struct fd *fd )
3536 return POLLIN;
3539 /* wake up all the sockets waiting for a change notification event */
3540 static void ifchange_wake_up( struct object *obj, unsigned int status )
3542 struct ifchange *ifchange = (struct ifchange *)obj;
3543 struct list *ptr, *next;
3544 assert( obj->ops == &ifchange_ops );
3545 assert( obj == ifchange_object );
3547 LIST_FOR_EACH_SAFE( ptr, next, &ifchange->sockets )
3549 struct sock *sock = LIST_ENTRY( ptr, struct sock, ifchange_entry );
3551 assert( sock->ifchange_obj );
3552 async_wake_up( &sock->ifchange_q, status ); /* issue ifchange notification for the socket */
3553 sock_release_ifchange( sock ); /* remove socket from list and decrement ifchange refcount */
3557 static void ifchange_poll_event( struct fd *fd, int event )
3559 struct object *ifchange = get_fd_user( fd );
3560 unsigned int status = STATUS_PENDING;
3561 char buffer[PIPE_BUF];
3562 int r;
3564 r = recv( get_unix_fd(fd), buffer, sizeof(buffer), MSG_DONTWAIT );
3565 if (r < 0)
3567 if (errno == EWOULDBLOCK || (EWOULDBLOCK != EAGAIN && errno == EAGAIN))
3568 return; /* retry when poll() says the socket is ready */
3569 status = sock_get_ntstatus( errno );
3571 else if (r > 0)
3573 struct nlmsghdr *nlh;
3575 for (nlh = (struct nlmsghdr *)buffer; NLMSG_OK(nlh, r); nlh = NLMSG_NEXT(nlh, r))
3577 if (nlh->nlmsg_type == NLMSG_DONE)
3578 break;
3579 if (nlh->nlmsg_type == RTM_NEWADDR || nlh->nlmsg_type == RTM_DELADDR)
3580 status = STATUS_SUCCESS;
3583 else status = STATUS_CANCELLED;
3585 if (status != STATUS_PENDING) ifchange_wake_up( ifchange, status );
3588 #endif
3590 /* we only need one of these interface notification objects, all of the sockets dependent upon
3591 * it will wake up when a notification event occurs */
3592 static struct object *get_ifchange( void )
3594 #ifdef HAVE_LINUX_RTNETLINK_H
3595 struct ifchange *ifchange;
3596 struct sockaddr_nl addr;
3597 int unix_fd;
3599 if (ifchange_object)
3601 /* increment the refcount for each socket that uses the ifchange object */
3602 return grab_object( ifchange_object );
3605 /* create the socket we need for processing interface change notifications */
3606 unix_fd = socket( PF_NETLINK, SOCK_RAW, NETLINK_ROUTE );
3607 if (unix_fd == -1)
3609 set_error( sock_get_ntstatus( errno ));
3610 return NULL;
3612 fcntl( unix_fd, F_SETFL, O_NONBLOCK ); /* make socket nonblocking */
3613 memset( &addr, 0, sizeof(addr) );
3614 addr.nl_family = AF_NETLINK;
3615 addr.nl_groups = RTMGRP_IPV4_IFADDR;
3616 /* bind the socket to the special netlink kernel interface */
3617 if (bind( unix_fd, (struct sockaddr *)&addr, sizeof(addr) ) == -1)
3619 close( unix_fd );
3620 set_error( sock_get_ntstatus( errno ));
3621 return NULL;
3623 if (!(ifchange = alloc_object( &ifchange_ops )))
3625 close( unix_fd );
3626 set_error( STATUS_NO_MEMORY );
3627 return NULL;
3629 list_init( &ifchange->sockets );
3630 if (!(ifchange->fd = create_anonymous_fd( &ifchange_fd_ops, unix_fd, &ifchange->obj, 0 )))
3632 release_object( ifchange );
3633 set_error( STATUS_NO_MEMORY );
3634 return NULL;
3636 set_fd_events( ifchange->fd, POLLIN ); /* enable read wakeup on the file descriptor */
3638 /* the ifchange object is now successfully configured */
3639 ifchange_object = &ifchange->obj;
3640 return &ifchange->obj;
3641 #else
3642 set_error( STATUS_NOT_SUPPORTED );
3643 return NULL;
3644 #endif
3647 /* add the socket to the interface change notification list */
3648 static void ifchange_add_sock( struct object *obj, struct sock *sock )
3650 #ifdef HAVE_LINUX_RTNETLINK_H
3651 struct ifchange *ifchange = (struct ifchange *)obj;
3653 list_add_tail( &ifchange->sockets, &sock->ifchange_entry );
3654 #endif
3657 /* create a new ifchange queue for a specific socket or, if one already exists, reuse the existing one */
3658 static struct object *sock_get_ifchange( struct sock *sock )
3660 struct object *ifchange;
3662 if (sock->ifchange_obj) /* reuse existing ifchange_obj for this socket */
3663 return sock->ifchange_obj;
3665 if (!(ifchange = get_ifchange()))
3666 return NULL;
3668 /* add the socket to the ifchange notification list */
3669 ifchange_add_sock( ifchange, sock );
3670 sock->ifchange_obj = ifchange;
3671 return ifchange;
3674 /* destroy an existing ifchange queue for a specific socket */
3675 static void sock_release_ifchange( struct sock *sock )
3677 if (sock->ifchange_obj)
3679 list_remove( &sock->ifchange_entry );
3680 release_object( sock->ifchange_obj );
3681 sock->ifchange_obj = NULL;
3685 static void socket_device_dump( struct object *obj, int verbose );
3686 static struct object *socket_device_lookup_name( struct object *obj, struct unicode_str *name,
3687 unsigned int attr, struct object *root );
3688 static struct object *socket_device_open_file( struct object *obj, unsigned int access,
3689 unsigned int sharing, unsigned int options );
3691 static const struct object_ops socket_device_ops =
3693 sizeof(struct object), /* size */
3694 &device_type, /* type */
3695 socket_device_dump, /* dump */
3696 no_add_queue, /* add_queue */
3697 NULL, /* remove_queue */
3698 NULL, /* signaled */
3699 no_satisfied, /* satisfied */
3700 no_signal, /* signal */
3701 no_get_fd, /* get_fd */
3702 default_map_access, /* map_access */
3703 default_get_sd, /* get_sd */
3704 default_set_sd, /* set_sd */
3705 default_get_full_name, /* get_full_name */
3706 socket_device_lookup_name, /* lookup_name */
3707 directory_link_name, /* link_name */
3708 default_unlink_name, /* unlink_name */
3709 socket_device_open_file, /* open_file */
3710 no_kernel_obj_list, /* get_kernel_obj_list */
3711 no_close_handle, /* close_handle */
3712 no_destroy /* destroy */
3715 static void socket_device_dump( struct object *obj, int verbose )
3717 fputs( "Socket device\n", stderr );
3720 static struct object *socket_device_lookup_name( struct object *obj, struct unicode_str *name,
3721 unsigned int attr, struct object *root )
3723 if (name) name->len = 0;
3724 return NULL;
3727 static struct object *socket_device_open_file( struct object *obj, unsigned int access,
3728 unsigned int sharing, unsigned int options )
3730 struct sock *sock;
3732 if (!(sock = create_socket())) return NULL;
3733 if (!(sock->fd = alloc_pseudo_fd( &sock_fd_ops, &sock->obj, options )))
3735 release_object( sock );
3736 return NULL;
3738 return &sock->obj;
3741 struct object *create_socket_device( struct object *root, const struct unicode_str *name,
3742 unsigned int attr, const struct security_descriptor *sd )
3744 return create_named_object( root, &socket_device_ops, name, attr, sd );
3747 DECL_HANDLER(recv_socket)
3749 struct sock *sock = (struct sock *)get_handle_obj( current->process, req->async.handle, 0, &sock_ops );
3750 unsigned int status = STATUS_PENDING;
3751 timeout_t timeout = 0;
3752 struct async *async;
3753 struct fd *fd;
3755 if (!sock) return;
3756 fd = sock->fd;
3758 if (!req->force_async && !sock->nonblocking && is_fd_overlapped( fd ))
3759 timeout = (timeout_t)sock->rcvtimeo * -10000;
3761 if (sock->rd_shutdown) status = STATUS_PIPE_DISCONNECTED;
3762 else if (!async_queued( &sock->read_q ))
3764 /* If read_q is not empty, we cannot really tell if the already queued
3765 * asyncs will not consume all available data; if there's no data
3766 * available, the current request won't be immediately satiable.
3768 if ((!req->force_async && sock->nonblocking) ||
3769 check_fd_events( sock->fd, req->oob && !is_oobinline( sock ) ? POLLPRI : POLLIN ))
3771 /* Give the client opportunity to complete synchronously.
3772 * If it turns out that the I/O request is not actually immediately satiable,
3773 * the client may then choose to re-queue the async (with STATUS_PENDING).
3775 * Note: If the nonblocking flag is set, we don't poll the socket
3776 * here and always opt for synchronous completion first. This is
3777 * because the application has probably seen POLLIN already from a
3778 * preceding select()/poll() call before it requested to receive
3779 * data.
3781 status = STATUS_ALERTED;
3785 if (status == STATUS_PENDING && !req->force_async && sock->nonblocking)
3786 status = STATUS_DEVICE_NOT_READY;
3788 sock->pending_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ);
3789 sock->reported_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ);
3791 if ((async = create_request_async( fd, get_fd_comp_flags( fd ), &req->async )))
3793 set_error( status );
3795 if (timeout)
3796 async_set_timeout( async, timeout, STATUS_IO_TIMEOUT );
3798 if (status == STATUS_PENDING || status == STATUS_ALERTED)
3799 queue_async( &sock->read_q, async );
3801 /* always reselect; we changed reported_events above */
3802 sock_reselect( sock );
3804 reply->wait = async_handoff( async, NULL, 0 );
3805 reply->options = get_fd_options( fd );
3806 reply->nonblocking = sock->nonblocking;
3807 release_object( async );
3809 release_object( sock );
3812 static void send_socket_completion_callback( void *private )
3814 struct send_req *send_req = private;
3815 struct iosb *iosb = send_req->iosb;
3816 struct sock *sock = send_req->sock;
3818 if (iosb->status != STATUS_SUCCESS)
3820 /* send() calls only clear and reselect events if unsuccessful. */
3821 sock->pending_events &= ~AFD_POLL_WRITE;
3822 sock->reported_events &= ~AFD_POLL_WRITE;
3823 sock_reselect( sock );
3826 release_object( iosb );
3827 release_object( sock );
3828 free( send_req );
3831 DECL_HANDLER(send_socket)
3833 struct sock *sock = (struct sock *)get_handle_obj( current->process, req->async.handle, 0, &sock_ops );
3834 unsigned int status = STATUS_PENDING;
3835 timeout_t timeout = 0;
3836 struct async *async;
3837 struct fd *fd;
3838 int bind_errno = 0;
3840 if (!sock) return;
3841 fd = sock->fd;
3843 if (sock->type == WS_SOCK_DGRAM && !sock->bound)
3845 union unix_sockaddr unix_addr;
3846 socklen_t unix_len;
3847 int unix_fd = get_unix_fd( fd );
3849 unix_len = get_unix_sockaddr_any( &unix_addr, sock->family );
3850 if (bind( unix_fd, &unix_addr.addr, unix_len ) < 0)
3851 bind_errno = errno;
3853 if (getsockname( unix_fd, &unix_addr.addr, &unix_len ) >= 0)
3855 sock->addr_len = sockaddr_from_unix( &unix_addr, &sock->addr.addr, sizeof(sock->addr) );
3856 sock->bound = 1;
3858 else if (!bind_errno) bind_errno = errno;
3861 if (!req->force_async && !sock->nonblocking && is_fd_overlapped( fd ))
3862 timeout = (timeout_t)sock->sndtimeo * -10000;
3864 if (bind_errno) status = sock_get_ntstatus( bind_errno );
3865 else if (sock->wr_shutdown) status = STATUS_PIPE_DISCONNECTED;
3866 else if (!async_queued( &sock->write_q ))
3868 /* If write_q is not empty, we cannot really tell if the already queued
3869 * asyncs will not consume all available space; if there's no space
3870 * available, the current request won't be immediately satiable.
3872 if ((!req->force_async && sock->nonblocking) || check_fd_events( sock->fd, POLLOUT ))
3874 /* Give the client opportunity to complete synchronously.
3875 * If it turns out that the I/O request is not actually immediately satiable,
3876 * the client may then choose to re-queue the async (with STATUS_PENDING).
3878 * Note: If the nonblocking flag is set, we don't poll the socket
3879 * here and always opt for synchronous completion first. This is
3880 * because the application has probably seen POLLOUT already from a
3881 * preceding select()/poll() call before it requested to send data.
3883 * Furthermore, some applications expect that any send() call on a
3884 * socket that has indicated POLLOUT beforehand never fails with
3885 * WSAEWOULDBLOCK. It's possible that Linux poll() may yield
3886 * POLLOUT on the first call but not the second, even if no send()
3887 * call has been made in the meanwhile. This can happen for a
3888 * number of reasons; for example, TCP fragmentation may consume
3889 * extra buffer space for each packet that has been split out, or
3890 * the TCP/IP networking stack may decide to shrink the send buffer
3891 * due to memory pressure.
3893 status = STATUS_ALERTED;
3897 if (status == STATUS_PENDING && !req->force_async && sock->nonblocking)
3898 status = STATUS_DEVICE_NOT_READY;
3900 if ((async = create_request_async( fd, get_fd_comp_flags( fd ), &req->async )))
3902 struct send_req *send_req;
3903 struct iosb *iosb = async_get_iosb( async );
3905 if ((send_req = mem_alloc( sizeof(*send_req) )))
3907 send_req->iosb = (struct iosb *)grab_object( iosb );
3908 send_req->sock = (struct sock *)grab_object( sock );
3909 async_set_completion_callback( async, send_socket_completion_callback, send_req );
3911 else if (status == STATUS_PENDING || status == STATUS_DEVICE_NOT_READY)
3912 status = STATUS_NO_MEMORY;
3914 release_object( iosb );
3916 set_error( status );
3918 if (timeout)
3919 async_set_timeout( async, timeout, STATUS_IO_TIMEOUT );
3921 if (status == STATUS_PENDING || status == STATUS_ALERTED)
3923 queue_async( &sock->write_q, async );
3924 sock_reselect( sock );
3927 reply->wait = async_handoff( async, NULL, 0 );
3928 reply->options = get_fd_options( fd );
3929 reply->nonblocking = sock->nonblocking;
3930 release_object( async );
3932 release_object( sock );
3935 DECL_HANDLER(socket_get_events)
3937 struct sock *sock = (struct sock *)get_handle_obj( current->process, req->handle, 0, &sock_ops );
3938 unsigned int status[13];
3939 struct event *event = NULL;
3940 unsigned int i;
3942 if (get_reply_max_size() < sizeof(status))
3944 set_error( STATUS_INVALID_PARAMETER );
3945 return;
3948 if (!sock) return;
3950 if (req->event)
3952 if (!(event = get_event_obj( current->process, req->event, EVENT_MODIFY_STATE )))
3954 release_object( sock );
3955 return;
3959 reply->flags = sock->pending_events & sock->mask;
3960 for (i = 0; i < ARRAY_SIZE( status ); ++i)
3961 status[i] = sock_get_ntstatus( sock->errors[i] );
3963 sock->pending_events &= ~sock->mask;
3964 sock_reselect( sock );
3966 if (event)
3968 reset_event( event );
3969 release_object( event );
3972 set_reply_data( status, sizeof(status) );
3974 release_object( sock );
3977 DECL_HANDLER(socket_send_icmp_id)
3979 struct sock *sock = (struct sock *)get_handle_obj( current->process, req->handle, 0, &sock_ops );
3981 if (!sock) return;
3983 if (sock->icmp_fixup_data_len == MAX_ICMP_HISTORY_LENGTH)
3985 memmove( sock->icmp_fixup_data, sock->icmp_fixup_data + 1,
3986 sizeof(*sock->icmp_fixup_data) * (MAX_ICMP_HISTORY_LENGTH - 1) );
3987 --sock->icmp_fixup_data_len;
3990 sock->icmp_fixup_data[sock->icmp_fixup_data_len].icmp_id = req->icmp_id;
3991 sock->icmp_fixup_data[sock->icmp_fixup_data_len].icmp_seq = req->icmp_seq;
3992 ++sock->icmp_fixup_data_len;
3994 release_object( sock );
3997 DECL_HANDLER(socket_get_icmp_id)
3999 struct sock *sock = (struct sock *)get_handle_obj( current->process, req->handle, 0, &sock_ops );
4000 unsigned int i;
4002 if (!sock) return;
4004 for (i = 0; i < sock->icmp_fixup_data_len; ++i)
4006 if (sock->icmp_fixup_data[i].icmp_seq == req->icmp_seq)
4008 reply->icmp_id = sock->icmp_fixup_data[i].icmp_id;
4009 --sock->icmp_fixup_data_len;
4010 memmove( &sock->icmp_fixup_data[i], &sock->icmp_fixup_data[i + 1],
4011 (sock->icmp_fixup_data_len - i) * sizeof(*sock->icmp_fixup_data) );
4012 release_object( sock );
4013 return;
4017 set_error( STATUS_NOT_FOUND );
4018 release_object( sock );