param: Rename variable used for lp_smb2_max_write ismb2_max_write
[Samba.git] / lib / tsocket / tsocket_bsd.c
blobfe39dfd5406474c6dbf1f5f12b32c1ec90d8e346
1 /*
2 Unix SMB/CIFS implementation.
4 Copyright (C) Stefan Metzmacher 2009
6 ** NOTE! The following LGPL license applies to the tsocket
7 ** library. This does NOT imply that all of Samba is released
8 ** under the LGPL
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 3 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
30 static int tsocket_bsd_error_from_errno(int ret,
31 int sys_errno,
32 bool *retry)
34 *retry = false;
36 if (ret >= 0) {
37 return 0;
40 if (ret != -1) {
41 return EIO;
44 if (sys_errno == 0) {
45 return EIO;
48 if (sys_errno == EINTR) {
49 *retry = true;
50 return sys_errno;
53 if (sys_errno == EINPROGRESS) {
54 *retry = true;
55 return sys_errno;
58 if (sys_errno == EAGAIN) {
59 *retry = true;
60 return sys_errno;
63 /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
64 if (sys_errno == ENOMEM) {
65 *retry = true;
66 return sys_errno;
69 #ifdef EWOULDBLOCK
70 if (sys_errno == EWOULDBLOCK) {
71 *retry = true;
72 return sys_errno;
74 #endif
76 return sys_errno;
79 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
81 int i;
82 int sys_errno = 0;
83 int fds[3];
84 int num_fds = 0;
86 int result, flags;
88 if (fd == -1) {
89 return -1;
92 /* first make a fd >= 3 */
93 if (high_fd) {
94 while (fd < 3) {
95 fds[num_fds++] = fd;
96 fd = dup(fd);
97 if (fd == -1) {
98 sys_errno = errno;
99 break;
102 for (i=0; i<num_fds; i++) {
103 close(fds[i]);
105 if (fd == -1) {
106 errno = sys_errno;
107 return fd;
111 /* fd should be nonblocking. */
113 #ifdef O_NONBLOCK
114 #define FLAG_TO_SET O_NONBLOCK
115 #else
116 #ifdef SYSV
117 #define FLAG_TO_SET O_NDELAY
118 #else /* BSD */
119 #define FLAG_TO_SET FNDELAY
120 #endif
121 #endif
123 if ((flags = fcntl(fd, F_GETFL)) == -1) {
124 goto fail;
127 flags |= FLAG_TO_SET;
128 if (fcntl(fd, F_SETFL, flags) == -1) {
129 goto fail;
132 #undef FLAG_TO_SET
134 /* fd should be closed on exec() */
135 #ifdef FD_CLOEXEC
136 result = flags = fcntl(fd, F_GETFD, 0);
137 if (flags >= 0) {
138 flags |= FD_CLOEXEC;
139 result = fcntl(fd, F_SETFD, flags);
141 if (result < 0) {
142 goto fail;
144 #endif
145 return fd;
147 fail:
148 if (fd != -1) {
149 sys_errno = errno;
150 close(fd);
151 errno = sys_errno;
153 return -1;
156 static ssize_t tsocket_bsd_pending(int fd)
158 int ret, error;
159 int value = 0;
160 socklen_t len;
162 ret = ioctl(fd, FIONREAD, &value);
163 if (ret == -1) {
164 return ret;
167 if (ret != 0) {
168 /* this should not be reached */
169 errno = EIO;
170 return -1;
173 if (value != 0) {
174 return value;
177 error = 0;
178 len = sizeof(error);
181 * if no data is available check if the socket is in error state. For
182 * dgram sockets it's the way to return ICMP error messages of
183 * connected sockets to the caller.
185 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
186 if (ret == -1) {
187 return ret;
189 if (error != 0) {
190 errno = error;
191 return -1;
193 return 0;
196 static const struct tsocket_address_ops tsocket_address_bsd_ops;
198 struct tsocket_address_bsd {
199 socklen_t sa_socklen;
200 union {
201 struct sockaddr sa;
202 struct sockaddr_in in;
203 #ifdef HAVE_IPV6
204 struct sockaddr_in6 in6;
205 #endif
206 struct sockaddr_un un;
207 struct sockaddr_storage ss;
208 } u;
211 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
212 const struct sockaddr *sa,
213 size_t sa_socklen,
214 struct tsocket_address **_addr,
215 const char *location)
217 struct tsocket_address *addr;
218 struct tsocket_address_bsd *bsda;
220 if (sa_socklen < sizeof(sa->sa_family)) {
221 errno = EINVAL;
222 return -1;
225 switch (sa->sa_family) {
226 case AF_UNIX:
227 if (sa_socklen > sizeof(struct sockaddr_un)) {
228 sa_socklen = sizeof(struct sockaddr_un);
230 break;
231 case AF_INET:
232 if (sa_socklen < sizeof(struct sockaddr_in)) {
233 errno = EINVAL;
234 return -1;
236 sa_socklen = sizeof(struct sockaddr_in);
237 break;
238 #ifdef HAVE_IPV6
239 case AF_INET6:
240 if (sa_socklen < sizeof(struct sockaddr_in6)) {
241 errno = EINVAL;
242 return -1;
244 sa_socklen = sizeof(struct sockaddr_in6);
245 break;
246 #endif
247 default:
248 errno = EAFNOSUPPORT;
249 return -1;
252 if (sa_socklen > sizeof(struct sockaddr_storage)) {
253 errno = EINVAL;
254 return -1;
257 addr = tsocket_address_create(mem_ctx,
258 &tsocket_address_bsd_ops,
259 &bsda,
260 struct tsocket_address_bsd,
261 location);
262 if (!addr) {
263 errno = ENOMEM;
264 return -1;
267 ZERO_STRUCTP(bsda);
269 memcpy(&bsda->u.ss, sa, sa_socklen);
271 bsda->sa_socklen = sa_socklen;
272 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
273 bsda->u.sa.sa_len = bsda->sa_socklen;
274 #endif
276 *_addr = addr;
277 return 0;
280 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
281 struct sockaddr *sa,
282 size_t sa_socklen)
284 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
285 struct tsocket_address_bsd);
287 if (!bsda) {
288 errno = EINVAL;
289 return -1;
292 if (sa_socklen < bsda->sa_socklen) {
293 errno = EINVAL;
294 return -1;
297 if (sa_socklen > bsda->sa_socklen) {
298 memset(sa, 0, sa_socklen);
299 sa_socklen = bsda->sa_socklen;
302 memcpy(sa, &bsda->u.ss, sa_socklen);
303 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
304 sa->sa_len = sa_socklen;
305 #endif
306 return sa_socklen;
309 bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
311 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
312 struct tsocket_address_bsd);
314 if (!bsda) {
315 return false;
318 switch (bsda->u.sa.sa_family) {
319 case AF_INET:
320 if (strcasecmp(fam, "ip") == 0) {
321 return true;
324 if (strcasecmp(fam, "ipv4") == 0) {
325 return true;
328 return false;
329 #ifdef HAVE_IPV6
330 case AF_INET6:
331 if (strcasecmp(fam, "ip") == 0) {
332 return true;
335 if (strcasecmp(fam, "ipv6") == 0) {
336 return true;
339 return false;
340 #endif
343 return false;
346 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
347 const char *fam,
348 const char *addr,
349 uint16_t port,
350 struct tsocket_address **_addr,
351 const char *location)
353 struct addrinfo hints;
354 struct addrinfo *result = NULL;
355 char port_str[6];
356 int ret;
358 ZERO_STRUCT(hints);
360 * we use SOCKET_STREAM here to get just one result
361 * back from getaddrinfo().
363 hints.ai_socktype = SOCK_STREAM;
364 hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
366 if (strcasecmp(fam, "ip") == 0) {
367 hints.ai_family = AF_UNSPEC;
368 if (!addr) {
369 #ifdef HAVE_IPV6
370 addr = "::";
371 #else
372 addr = "0.0.0.0";
373 #endif
375 } else if (strcasecmp(fam, "ipv4") == 0) {
376 hints.ai_family = AF_INET;
377 if (!addr) {
378 addr = "0.0.0.0";
380 #ifdef HAVE_IPV6
381 } else if (strcasecmp(fam, "ipv6") == 0) {
382 hints.ai_family = AF_INET6;
383 if (!addr) {
384 addr = "::";
386 #endif
387 } else {
388 errno = EAFNOSUPPORT;
389 return -1;
392 snprintf(port_str, sizeof(port_str), "%u", port);
394 ret = getaddrinfo(addr, port_str, &hints, &result);
395 if (ret != 0) {
396 switch (ret) {
397 case EAI_FAIL:
398 errno = EINVAL;
399 break;
401 ret = -1;
402 goto done;
405 if (result->ai_socktype != SOCK_STREAM) {
406 errno = EINVAL;
407 ret = -1;
408 goto done;
411 ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
412 result->ai_addr,
413 result->ai_addrlen,
414 _addr,
415 location);
417 done:
418 if (result) {
419 freeaddrinfo(result);
421 return ret;
424 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
425 TALLOC_CTX *mem_ctx)
427 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
428 struct tsocket_address_bsd);
429 char addr_str[INET6_ADDRSTRLEN+1];
430 const char *str;
432 if (!bsda) {
433 errno = EINVAL;
434 return NULL;
437 switch (bsda->u.sa.sa_family) {
438 case AF_INET:
439 str = inet_ntop(bsda->u.in.sin_family,
440 &bsda->u.in.sin_addr,
441 addr_str, sizeof(addr_str));
442 break;
443 #ifdef HAVE_IPV6
444 case AF_INET6:
445 str = inet_ntop(bsda->u.in6.sin6_family,
446 &bsda->u.in6.sin6_addr,
447 addr_str, sizeof(addr_str));
448 break;
449 #endif
450 default:
451 errno = EINVAL;
452 return NULL;
455 if (!str) {
456 return NULL;
459 return talloc_strdup(mem_ctx, str);
462 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
464 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
465 struct tsocket_address_bsd);
466 uint16_t port = 0;
468 if (!bsda) {
469 errno = EINVAL;
470 return 0;
473 switch (bsda->u.sa.sa_family) {
474 case AF_INET:
475 port = ntohs(bsda->u.in.sin_port);
476 break;
477 #ifdef HAVE_IPV6
478 case AF_INET6:
479 port = ntohs(bsda->u.in6.sin6_port);
480 break;
481 #endif
482 default:
483 errno = EINVAL;
484 return 0;
487 return port;
490 int tsocket_address_inet_set_port(struct tsocket_address *addr,
491 uint16_t port)
493 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
494 struct tsocket_address_bsd);
496 if (!bsda) {
497 errno = EINVAL;
498 return -1;
501 switch (bsda->u.sa.sa_family) {
502 case AF_INET:
503 bsda->u.in.sin_port = htons(port);
504 break;
505 #ifdef HAVE_IPV6
506 case AF_INET6:
507 bsda->u.in6.sin6_port = htons(port);
508 break;
509 #endif
510 default:
511 errno = EINVAL;
512 return -1;
515 return 0;
518 bool tsocket_address_is_unix(const struct tsocket_address *addr)
520 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
521 struct tsocket_address_bsd);
523 if (!bsda) {
524 return false;
527 switch (bsda->u.sa.sa_family) {
528 case AF_UNIX:
529 return true;
532 return false;
535 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
536 const char *path,
537 struct tsocket_address **_addr,
538 const char *location)
540 struct sockaddr_un un;
541 void *p = &un;
542 int ret;
544 if (!path) {
545 path = "";
548 if (strlen(path) > sizeof(un.sun_path)-1) {
549 errno = ENAMETOOLONG;
550 return -1;
553 ZERO_STRUCT(un);
554 un.sun_family = AF_UNIX;
555 strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
557 ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
558 (struct sockaddr *)p,
559 sizeof(un),
560 _addr,
561 location);
563 return ret;
566 char *tsocket_address_unix_path(const struct tsocket_address *addr,
567 TALLOC_CTX *mem_ctx)
569 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
570 struct tsocket_address_bsd);
571 const char *str;
573 if (!bsda) {
574 errno = EINVAL;
575 return NULL;
578 switch (bsda->u.sa.sa_family) {
579 case AF_UNIX:
580 str = bsda->u.un.sun_path;
581 break;
582 default:
583 errno = EINVAL;
584 return NULL;
587 return talloc_strdup(mem_ctx, str);
590 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
591 TALLOC_CTX *mem_ctx)
593 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
594 struct tsocket_address_bsd);
595 char *str;
596 char *addr_str;
597 const char *prefix = NULL;
598 uint16_t port;
600 switch (bsda->u.sa.sa_family) {
601 case AF_UNIX:
602 return talloc_asprintf(mem_ctx, "unix:%s",
603 bsda->u.un.sun_path);
604 case AF_INET:
605 prefix = "ipv4";
606 break;
607 #ifdef HAVE_IPV6
608 case AF_INET6:
609 prefix = "ipv6";
610 break;
611 #endif
612 default:
613 errno = EINVAL;
614 return NULL;
617 addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
618 if (!addr_str) {
619 return NULL;
622 port = tsocket_address_inet_port(addr);
624 str = talloc_asprintf(mem_ctx, "%s:%s:%u",
625 prefix, addr_str, port);
626 talloc_free(addr_str);
628 return str;
631 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
632 TALLOC_CTX *mem_ctx,
633 const char *location)
635 struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
636 struct tsocket_address_bsd);
637 struct tsocket_address *copy;
638 int ret;
640 ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
641 &bsda->u.sa,
642 bsda->sa_socklen,
643 &copy,
644 location);
645 if (ret != 0) {
646 return NULL;
649 return copy;
652 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
653 .name = "bsd",
654 .string = tsocket_address_bsd_string,
655 .copy = tsocket_address_bsd_copy,
658 struct tdgram_bsd {
659 int fd;
661 void *event_ptr;
662 struct tevent_fd *fde;
663 bool optimize_recvfrom;
665 void *readable_private;
666 void (*readable_handler)(void *private_data);
667 void *writeable_private;
668 void (*writeable_handler)(void *private_data);
671 bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
672 bool on)
674 struct tdgram_bsd *bsds =
675 talloc_get_type(_tdgram_context_data(dgram),
676 struct tdgram_bsd);
677 bool old;
679 if (bsds == NULL) {
680 /* not a bsd socket */
681 return false;
684 old = bsds->optimize_recvfrom;
685 bsds->optimize_recvfrom = on;
687 return old;
690 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
691 struct tevent_fd *fde,
692 uint16_t flags,
693 void *private_data)
695 struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
696 struct tdgram_bsd);
698 if (flags & TEVENT_FD_WRITE) {
699 bsds->writeable_handler(bsds->writeable_private);
700 return;
702 if (flags & TEVENT_FD_READ) {
703 if (!bsds->readable_handler) {
704 TEVENT_FD_NOT_READABLE(bsds->fde);
705 return;
707 bsds->readable_handler(bsds->readable_private);
708 return;
712 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
713 struct tevent_context *ev,
714 void (*handler)(void *private_data),
715 void *private_data)
717 if (ev == NULL) {
718 if (handler) {
719 errno = EINVAL;
720 return -1;
722 if (!bsds->readable_handler) {
723 return 0;
725 bsds->readable_handler = NULL;
726 bsds->readable_private = NULL;
728 return 0;
731 /* read and write must use the same tevent_context */
732 if (bsds->event_ptr != ev) {
733 if (bsds->readable_handler || bsds->writeable_handler) {
734 errno = EINVAL;
735 return -1;
737 bsds->event_ptr = NULL;
738 TALLOC_FREE(bsds->fde);
741 if (tevent_fd_get_flags(bsds->fde) == 0) {
742 TALLOC_FREE(bsds->fde);
744 bsds->fde = tevent_add_fd(ev, bsds,
745 bsds->fd, TEVENT_FD_READ,
746 tdgram_bsd_fde_handler,
747 bsds);
748 if (!bsds->fde) {
749 errno = ENOMEM;
750 return -1;
753 /* cache the event context we're running on */
754 bsds->event_ptr = ev;
755 } else if (!bsds->readable_handler) {
756 TEVENT_FD_READABLE(bsds->fde);
759 bsds->readable_handler = handler;
760 bsds->readable_private = private_data;
762 return 0;
765 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
766 struct tevent_context *ev,
767 void (*handler)(void *private_data),
768 void *private_data)
770 if (ev == NULL) {
771 if (handler) {
772 errno = EINVAL;
773 return -1;
775 if (!bsds->writeable_handler) {
776 return 0;
778 bsds->writeable_handler = NULL;
779 bsds->writeable_private = NULL;
780 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
782 return 0;
785 /* read and write must use the same tevent_context */
786 if (bsds->event_ptr != ev) {
787 if (bsds->readable_handler || bsds->writeable_handler) {
788 errno = EINVAL;
789 return -1;
791 bsds->event_ptr = NULL;
792 TALLOC_FREE(bsds->fde);
795 if (tevent_fd_get_flags(bsds->fde) == 0) {
796 TALLOC_FREE(bsds->fde);
798 bsds->fde = tevent_add_fd(ev, bsds,
799 bsds->fd, TEVENT_FD_WRITE,
800 tdgram_bsd_fde_handler,
801 bsds);
802 if (!bsds->fde) {
803 errno = ENOMEM;
804 return -1;
807 /* cache the event context we're running on */
808 bsds->event_ptr = ev;
809 } else if (!bsds->writeable_handler) {
810 TEVENT_FD_WRITEABLE(bsds->fde);
813 bsds->writeable_handler = handler;
814 bsds->writeable_private = private_data;
816 return 0;
819 struct tdgram_bsd_recvfrom_state {
820 struct tdgram_context *dgram;
821 bool first_try;
822 uint8_t *buf;
823 size_t len;
824 struct tsocket_address *src;
827 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
829 struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
830 struct tdgram_bsd);
832 tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
834 return 0;
837 static void tdgram_bsd_recvfrom_handler(void *private_data);
839 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
840 struct tevent_context *ev,
841 struct tdgram_context *dgram)
843 struct tevent_req *req;
844 struct tdgram_bsd_recvfrom_state *state;
845 struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
846 int ret;
848 req = tevent_req_create(mem_ctx, &state,
849 struct tdgram_bsd_recvfrom_state);
850 if (!req) {
851 return NULL;
854 state->dgram = dgram;
855 state->first_try= true;
856 state->buf = NULL;
857 state->len = 0;
858 state->src = NULL;
860 talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
862 if (bsds->fd == -1) {
863 tevent_req_error(req, ENOTCONN);
864 goto post;
869 * this is a fast path, not waiting for the
870 * socket to become explicit readable gains
871 * about 10%-20% performance in benchmark tests.
873 if (bsds->optimize_recvfrom) {
875 * We only do the optimization on
876 * recvfrom if the caller asked for it.
878 * This is needed because in most cases
879 * we preferr to flush send buffers before
880 * receiving incoming requests.
882 tdgram_bsd_recvfrom_handler(req);
883 if (!tevent_req_is_in_progress(req)) {
884 goto post;
888 ret = tdgram_bsd_set_readable_handler(bsds, ev,
889 tdgram_bsd_recvfrom_handler,
890 req);
891 if (ret == -1) {
892 tevent_req_error(req, errno);
893 goto post;
896 return req;
898 post:
899 tevent_req_post(req, ev);
900 return req;
903 static void tdgram_bsd_recvfrom_handler(void *private_data)
905 struct tevent_req *req = talloc_get_type_abort(private_data,
906 struct tevent_req);
907 struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
908 struct tdgram_bsd_recvfrom_state);
909 struct tdgram_context *dgram = state->dgram;
910 struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
911 struct tsocket_address_bsd *bsda;
912 ssize_t ret;
913 int err;
914 bool retry;
916 ret = tsocket_bsd_pending(bsds->fd);
917 if (state->first_try && ret == 0) {
918 state->first_try = false;
919 /* retry later */
920 return;
922 state->first_try = false;
924 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
925 if (retry) {
926 /* retry later */
927 return;
929 if (tevent_req_error(req, err)) {
930 return;
933 /* note that 'ret' can be 0 here */
934 state->buf = talloc_array(state, uint8_t, ret);
935 if (tevent_req_nomem(state->buf, req)) {
936 return;
938 state->len = ret;
940 state->src = tsocket_address_create(state,
941 &tsocket_address_bsd_ops,
942 &bsda,
943 struct tsocket_address_bsd,
944 __location__ "bsd_recvfrom");
945 if (tevent_req_nomem(state->src, req)) {
946 return;
949 ZERO_STRUCTP(bsda);
950 bsda->sa_socklen = sizeof(bsda->u.ss);
951 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
952 bsda->u.sa.sa_len = bsda->sa_socklen;
953 #endif
955 ret = recvfrom(bsds->fd, state->buf, state->len, 0,
956 &bsda->u.sa, &bsda->sa_socklen);
957 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
958 if (retry) {
959 /* retry later */
960 return;
962 if (tevent_req_error(req, err)) {
963 return;
967 * Some systems (FreeBSD, see bug #7115) return too much
968 * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
969 * the return value includes some IP/UDP header bytes,
970 * while recvfrom() just returns the payload.
972 state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
973 if (tevent_req_nomem(state->buf, req)) {
974 return;
976 state->len = ret;
978 tevent_req_done(req);
981 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
982 int *perrno,
983 TALLOC_CTX *mem_ctx,
984 uint8_t **buf,
985 struct tsocket_address **src)
987 struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
988 struct tdgram_bsd_recvfrom_state);
989 ssize_t ret;
991 ret = tsocket_simple_int_recv(req, perrno);
992 if (ret == 0) {
993 *buf = talloc_move(mem_ctx, &state->buf);
994 ret = state->len;
995 if (src) {
996 *src = talloc_move(mem_ctx, &state->src);
1000 tevent_req_received(req);
1001 return ret;
1004 struct tdgram_bsd_sendto_state {
1005 struct tdgram_context *dgram;
1007 const uint8_t *buf;
1008 size_t len;
1009 const struct tsocket_address *dst;
1011 ssize_t ret;
1014 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
1016 struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
1017 struct tdgram_bsd);
1019 tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1021 return 0;
1024 static void tdgram_bsd_sendto_handler(void *private_data);
1026 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
1027 struct tevent_context *ev,
1028 struct tdgram_context *dgram,
1029 const uint8_t *buf,
1030 size_t len,
1031 const struct tsocket_address *dst)
1033 struct tevent_req *req;
1034 struct tdgram_bsd_sendto_state *state;
1035 struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1036 int ret;
1038 req = tevent_req_create(mem_ctx, &state,
1039 struct tdgram_bsd_sendto_state);
1040 if (!req) {
1041 return NULL;
1044 state->dgram = dgram;
1045 state->buf = buf;
1046 state->len = len;
1047 state->dst = dst;
1048 state->ret = -1;
1050 talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
1052 if (bsds->fd == -1) {
1053 tevent_req_error(req, ENOTCONN);
1054 goto post;
1058 * this is a fast path, not waiting for the
1059 * socket to become explicit writeable gains
1060 * about 10%-20% performance in benchmark tests.
1062 tdgram_bsd_sendto_handler(req);
1063 if (!tevent_req_is_in_progress(req)) {
1064 goto post;
1067 ret = tdgram_bsd_set_writeable_handler(bsds, ev,
1068 tdgram_bsd_sendto_handler,
1069 req);
1070 if (ret == -1) {
1071 tevent_req_error(req, errno);
1072 goto post;
1075 return req;
1077 post:
1078 tevent_req_post(req, ev);
1079 return req;
1082 static void tdgram_bsd_sendto_handler(void *private_data)
1084 struct tevent_req *req = talloc_get_type_abort(private_data,
1085 struct tevent_req);
1086 struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1087 struct tdgram_bsd_sendto_state);
1088 struct tdgram_context *dgram = state->dgram;
1089 struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1090 struct sockaddr *sa = NULL;
1091 socklen_t sa_socklen = 0;
1092 ssize_t ret;
1093 int err;
1094 bool retry;
1096 if (state->dst) {
1097 struct tsocket_address_bsd *bsda =
1098 talloc_get_type(state->dst->private_data,
1099 struct tsocket_address_bsd);
1101 sa = &bsda->u.sa;
1102 sa_socklen = bsda->sa_socklen;
1105 ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1106 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1107 if (retry) {
1108 /* retry later */
1109 return;
1112 if (err == EMSGSIZE) {
1113 /* round up in 1K increments */
1114 int bufsize = ((state->len + 1023) & (~1023));
1116 ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
1117 sizeof(bufsize));
1118 if (ret == 0) {
1120 * We do the rety here, rather then via the
1121 * handler, as we only want to retry once for
1122 * this condition, so if there is a mismatch
1123 * between what setsockopt() accepts and what can
1124 * actually be sent, we do not end up in a
1125 * loop.
1128 ret = sendto(bsds->fd, state->buf, state->len,
1129 0, sa, sa_socklen);
1130 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1131 if (retry) { /* retry later */
1132 return;
1137 if (tevent_req_error(req, err)) {
1138 return;
1141 state->ret = ret;
1143 tevent_req_done(req);
1146 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1148 struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1149 struct tdgram_bsd_sendto_state);
1150 ssize_t ret;
1152 ret = tsocket_simple_int_recv(req, perrno);
1153 if (ret == 0) {
1154 ret = state->ret;
1157 tevent_req_received(req);
1158 return ret;
1161 struct tdgram_bsd_disconnect_state {
1162 uint8_t __dummy;
1165 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1166 struct tevent_context *ev,
1167 struct tdgram_context *dgram)
1169 struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1170 struct tevent_req *req;
1171 struct tdgram_bsd_disconnect_state *state;
1172 int ret;
1173 int err;
1174 bool dummy;
1176 req = tevent_req_create(mem_ctx, &state,
1177 struct tdgram_bsd_disconnect_state);
1178 if (req == NULL) {
1179 return NULL;
1182 if (bsds->fd == -1) {
1183 tevent_req_error(req, ENOTCONN);
1184 goto post;
1187 TALLOC_FREE(bsds->fde);
1188 ret = close(bsds->fd);
1189 bsds->fd = -1;
1190 err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1191 if (tevent_req_error(req, err)) {
1192 goto post;
1195 tevent_req_done(req);
1196 post:
1197 tevent_req_post(req, ev);
1198 return req;
1201 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1202 int *perrno)
1204 int ret;
1206 ret = tsocket_simple_int_recv(req, perrno);
1208 tevent_req_received(req);
1209 return ret;
1212 static const struct tdgram_context_ops tdgram_bsd_ops = {
1213 .name = "bsd",
1215 .recvfrom_send = tdgram_bsd_recvfrom_send,
1216 .recvfrom_recv = tdgram_bsd_recvfrom_recv,
1218 .sendto_send = tdgram_bsd_sendto_send,
1219 .sendto_recv = tdgram_bsd_sendto_recv,
1221 .disconnect_send = tdgram_bsd_disconnect_send,
1222 .disconnect_recv = tdgram_bsd_disconnect_recv,
1225 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1227 TALLOC_FREE(bsds->fde);
1228 if (bsds->fd != -1) {
1229 close(bsds->fd);
1230 bsds->fd = -1;
1232 return 0;
1235 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1236 const struct tsocket_address *remote,
1237 bool broadcast,
1238 TALLOC_CTX *mem_ctx,
1239 struct tdgram_context **_dgram,
1240 const char *location)
1242 struct tsocket_address_bsd *lbsda =
1243 talloc_get_type_abort(local->private_data,
1244 struct tsocket_address_bsd);
1245 struct tsocket_address_bsd *rbsda = NULL;
1246 struct tdgram_context *dgram;
1247 struct tdgram_bsd *bsds;
1248 int fd;
1249 int ret;
1250 bool do_bind = false;
1251 bool do_reuseaddr = false;
1252 bool do_ipv6only = false;
1253 bool is_inet = false;
1254 int sa_fam = lbsda->u.sa.sa_family;
1256 if (remote) {
1257 rbsda = talloc_get_type_abort(remote->private_data,
1258 struct tsocket_address_bsd);
1261 switch (lbsda->u.sa.sa_family) {
1262 case AF_UNIX:
1263 if (broadcast) {
1264 errno = EINVAL;
1265 return -1;
1267 if (lbsda->u.un.sun_path[0] != 0) {
1268 do_reuseaddr = true;
1269 do_bind = true;
1271 break;
1272 case AF_INET:
1273 if (lbsda->u.in.sin_port != 0) {
1274 do_reuseaddr = true;
1275 do_bind = true;
1277 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1278 do_bind = true;
1280 is_inet = true;
1281 break;
1282 #ifdef HAVE_IPV6
1283 case AF_INET6:
1284 if (lbsda->u.in6.sin6_port != 0) {
1285 do_reuseaddr = true;
1286 do_bind = true;
1288 if (memcmp(&in6addr_any,
1289 &lbsda->u.in6.sin6_addr,
1290 sizeof(in6addr_any)) != 0) {
1291 do_bind = true;
1293 is_inet = true;
1294 do_ipv6only = true;
1295 break;
1296 #endif
1297 default:
1298 errno = EINVAL;
1299 return -1;
1302 if (!do_bind && is_inet && rbsda) {
1303 sa_fam = rbsda->u.sa.sa_family;
1304 switch (sa_fam) {
1305 case AF_INET:
1306 do_ipv6only = false;
1307 break;
1308 #ifdef HAVE_IPV6
1309 case AF_INET6:
1310 do_ipv6only = true;
1311 break;
1312 #endif
1316 fd = socket(sa_fam, SOCK_DGRAM, 0);
1317 if (fd < 0) {
1318 return -1;
1321 fd = tsocket_bsd_common_prepare_fd(fd, true);
1322 if (fd < 0) {
1323 return -1;
1326 dgram = tdgram_context_create(mem_ctx,
1327 &tdgram_bsd_ops,
1328 &bsds,
1329 struct tdgram_bsd,
1330 location);
1331 if (!dgram) {
1332 int saved_errno = errno;
1333 close(fd);
1334 errno = saved_errno;
1335 return -1;
1337 ZERO_STRUCTP(bsds);
1338 bsds->fd = fd;
1339 talloc_set_destructor(bsds, tdgram_bsd_destructor);
1341 #ifdef HAVE_IPV6
1342 if (do_ipv6only) {
1343 int val = 1;
1345 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1346 (const void *)&val, sizeof(val));
1347 if (ret == -1) {
1348 int saved_errno = errno;
1349 talloc_free(dgram);
1350 errno = saved_errno;
1351 return -1;
1354 #endif
1356 if (broadcast) {
1357 int val = 1;
1359 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1360 (const void *)&val, sizeof(val));
1361 if (ret == -1) {
1362 int saved_errno = errno;
1363 talloc_free(dgram);
1364 errno = saved_errno;
1365 return -1;
1369 if (do_reuseaddr) {
1370 int val = 1;
1372 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1373 (const void *)&val, sizeof(val));
1374 if (ret == -1) {
1375 int saved_errno = errno;
1376 talloc_free(dgram);
1377 errno = saved_errno;
1378 return -1;
1382 if (do_bind) {
1383 ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
1384 if (ret == -1) {
1385 int saved_errno = errno;
1386 talloc_free(dgram);
1387 errno = saved_errno;
1388 return -1;
1392 if (rbsda) {
1393 if (rbsda->u.sa.sa_family != sa_fam) {
1394 talloc_free(dgram);
1395 errno = EINVAL;
1396 return -1;
1399 ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
1400 if (ret == -1) {
1401 int saved_errno = errno;
1402 talloc_free(dgram);
1403 errno = saved_errno;
1404 return -1;
1408 *_dgram = dgram;
1409 return 0;
1412 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1413 const struct tsocket_address *remote,
1414 TALLOC_CTX *mem_ctx,
1415 struct tdgram_context **dgram,
1416 const char *location)
1418 struct tsocket_address_bsd *lbsda =
1419 talloc_get_type_abort(local->private_data,
1420 struct tsocket_address_bsd);
1421 int ret;
1423 switch (lbsda->u.sa.sa_family) {
1424 case AF_INET:
1425 break;
1426 #ifdef HAVE_IPV6
1427 case AF_INET6:
1428 break;
1429 #endif
1430 default:
1431 errno = EINVAL;
1432 return -1;
1435 ret = tdgram_bsd_dgram_socket(local, remote, false,
1436 mem_ctx, dgram, location);
1438 return ret;
1441 int _tdgram_unix_socket(const struct tsocket_address *local,
1442 const struct tsocket_address *remote,
1443 TALLOC_CTX *mem_ctx,
1444 struct tdgram_context **dgram,
1445 const char *location)
1447 struct tsocket_address_bsd *lbsda =
1448 talloc_get_type_abort(local->private_data,
1449 struct tsocket_address_bsd);
1450 int ret;
1452 switch (lbsda->u.sa.sa_family) {
1453 case AF_UNIX:
1454 break;
1455 default:
1456 errno = EINVAL;
1457 return -1;
1460 ret = tdgram_bsd_dgram_socket(local, remote, false,
1461 mem_ctx, dgram, location);
1463 return ret;
1466 struct tstream_bsd {
1467 int fd;
1469 void *event_ptr;
1470 struct tevent_fd *fde;
1471 bool optimize_readv;
1473 void *readable_private;
1474 void (*readable_handler)(void *private_data);
1475 void *writeable_private;
1476 void (*writeable_handler)(void *private_data);
1479 bool tstream_bsd_optimize_readv(struct tstream_context *stream,
1480 bool on)
1482 struct tstream_bsd *bsds =
1483 talloc_get_type(_tstream_context_data(stream),
1484 struct tstream_bsd);
1485 bool old;
1487 if (bsds == NULL) {
1488 /* not a bsd socket */
1489 return false;
1492 old = bsds->optimize_readv;
1493 bsds->optimize_readv = on;
1495 return old;
1498 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1499 struct tevent_fd *fde,
1500 uint16_t flags,
1501 void *private_data)
1503 struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1504 struct tstream_bsd);
1506 if (flags & TEVENT_FD_WRITE) {
1507 bsds->writeable_handler(bsds->writeable_private);
1508 return;
1510 if (flags & TEVENT_FD_READ) {
1511 if (!bsds->readable_handler) {
1512 if (bsds->writeable_handler) {
1513 bsds->writeable_handler(bsds->writeable_private);
1514 return;
1516 TEVENT_FD_NOT_READABLE(bsds->fde);
1517 return;
1519 bsds->readable_handler(bsds->readable_private);
1520 return;
1524 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1525 struct tevent_context *ev,
1526 void (*handler)(void *private_data),
1527 void *private_data)
1529 if (ev == NULL) {
1530 if (handler) {
1531 errno = EINVAL;
1532 return -1;
1534 if (!bsds->readable_handler) {
1535 return 0;
1537 bsds->readable_handler = NULL;
1538 bsds->readable_private = NULL;
1540 return 0;
1543 /* read and write must use the same tevent_context */
1544 if (bsds->event_ptr != ev) {
1545 if (bsds->readable_handler || bsds->writeable_handler) {
1546 errno = EINVAL;
1547 return -1;
1549 bsds->event_ptr = NULL;
1550 TALLOC_FREE(bsds->fde);
1553 if (tevent_fd_get_flags(bsds->fde) == 0) {
1554 TALLOC_FREE(bsds->fde);
1556 bsds->fde = tevent_add_fd(ev, bsds,
1557 bsds->fd, TEVENT_FD_READ,
1558 tstream_bsd_fde_handler,
1559 bsds);
1560 if (!bsds->fde) {
1561 errno = ENOMEM;
1562 return -1;
1565 /* cache the event context we're running on */
1566 bsds->event_ptr = ev;
1567 } else if (!bsds->readable_handler) {
1568 TEVENT_FD_READABLE(bsds->fde);
1571 bsds->readable_handler = handler;
1572 bsds->readable_private = private_data;
1574 return 0;
1577 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1578 struct tevent_context *ev,
1579 void (*handler)(void *private_data),
1580 void *private_data)
1582 if (ev == NULL) {
1583 if (handler) {
1584 errno = EINVAL;
1585 return -1;
1587 if (!bsds->writeable_handler) {
1588 return 0;
1590 bsds->writeable_handler = NULL;
1591 bsds->writeable_private = NULL;
1592 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1594 return 0;
1597 /* read and write must use the same tevent_context */
1598 if (bsds->event_ptr != ev) {
1599 if (bsds->readable_handler || bsds->writeable_handler) {
1600 errno = EINVAL;
1601 return -1;
1603 bsds->event_ptr = NULL;
1604 TALLOC_FREE(bsds->fde);
1607 if (tevent_fd_get_flags(bsds->fde) == 0) {
1608 TALLOC_FREE(bsds->fde);
1610 bsds->fde = tevent_add_fd(ev, bsds,
1611 bsds->fd,
1612 TEVENT_FD_READ | TEVENT_FD_WRITE,
1613 tstream_bsd_fde_handler,
1614 bsds);
1615 if (!bsds->fde) {
1616 errno = ENOMEM;
1617 return -1;
1620 /* cache the event context we're running on */
1621 bsds->event_ptr = ev;
1622 } else if (!bsds->writeable_handler) {
1623 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1624 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1625 tevent_fd_set_flags(bsds->fde, flags);
1628 bsds->writeable_handler = handler;
1629 bsds->writeable_private = private_data;
1631 return 0;
1634 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1636 struct tstream_bsd *bsds = tstream_context_data(stream,
1637 struct tstream_bsd);
1638 ssize_t ret;
1640 if (bsds->fd == -1) {
1641 errno = ENOTCONN;
1642 return -1;
1645 ret = tsocket_bsd_pending(bsds->fd);
1647 return ret;
1650 struct tstream_bsd_readv_state {
1651 struct tstream_context *stream;
1653 struct iovec *vector;
1654 size_t count;
1656 int ret;
1659 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1661 struct tstream_bsd *bsds = tstream_context_data(state->stream,
1662 struct tstream_bsd);
1664 tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1666 return 0;
1669 static void tstream_bsd_readv_handler(void *private_data);
1671 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1672 struct tevent_context *ev,
1673 struct tstream_context *stream,
1674 struct iovec *vector,
1675 size_t count)
1677 struct tevent_req *req;
1678 struct tstream_bsd_readv_state *state;
1679 struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1680 int ret;
1682 req = tevent_req_create(mem_ctx, &state,
1683 struct tstream_bsd_readv_state);
1684 if (!req) {
1685 return NULL;
1688 state->stream = stream;
1689 /* we make a copy of the vector so that we can modify it */
1690 state->vector = talloc_array(state, struct iovec, count);
1691 if (tevent_req_nomem(state->vector, req)) {
1692 goto post;
1694 memcpy(state->vector, vector, sizeof(struct iovec)*count);
1695 state->count = count;
1696 state->ret = 0;
1698 talloc_set_destructor(state, tstream_bsd_readv_destructor);
1700 if (bsds->fd == -1) {
1701 tevent_req_error(req, ENOTCONN);
1702 goto post;
1706 * this is a fast path, not waiting for the
1707 * socket to become explicit readable gains
1708 * about 10%-20% performance in benchmark tests.
1710 if (bsds->optimize_readv) {
1712 * We only do the optimization on
1713 * readv if the caller asked for it.
1715 * This is needed because in most cases
1716 * we preferr to flush send buffers before
1717 * receiving incoming requests.
1719 tstream_bsd_readv_handler(req);
1720 if (!tevent_req_is_in_progress(req)) {
1721 goto post;
1725 ret = tstream_bsd_set_readable_handler(bsds, ev,
1726 tstream_bsd_readv_handler,
1727 req);
1728 if (ret == -1) {
1729 tevent_req_error(req, errno);
1730 goto post;
1733 return req;
1735 post:
1736 tevent_req_post(req, ev);
1737 return req;
1740 static void tstream_bsd_readv_handler(void *private_data)
1742 struct tevent_req *req = talloc_get_type_abort(private_data,
1743 struct tevent_req);
1744 struct tstream_bsd_readv_state *state = tevent_req_data(req,
1745 struct tstream_bsd_readv_state);
1746 struct tstream_context *stream = state->stream;
1747 struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1748 int ret;
1749 int err;
1750 bool retry;
1752 ret = readv(bsds->fd, state->vector, state->count);
1753 if (ret == 0) {
1754 /* propagate end of file */
1755 tevent_req_error(req, EPIPE);
1756 return;
1758 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1759 if (retry) {
1760 /* retry later */
1761 return;
1763 if (tevent_req_error(req, err)) {
1764 return;
1767 state->ret += ret;
1769 while (ret > 0) {
1770 if (ret < state->vector[0].iov_len) {
1771 uint8_t *base;
1772 base = (uint8_t *)state->vector[0].iov_base;
1773 base += ret;
1774 state->vector[0].iov_base = (void *)base;
1775 state->vector[0].iov_len -= ret;
1776 break;
1778 ret -= state->vector[0].iov_len;
1779 state->vector += 1;
1780 state->count -= 1;
1784 * there're maybe some empty vectors at the end
1785 * which we need to skip, otherwise we would get
1786 * ret == 0 from the readv() call and return EPIPE
1788 while (state->count > 0) {
1789 if (state->vector[0].iov_len > 0) {
1790 break;
1792 state->vector += 1;
1793 state->count -= 1;
1796 if (state->count > 0) {
1797 /* we have more to read */
1798 return;
1801 tevent_req_done(req);
1804 static int tstream_bsd_readv_recv(struct tevent_req *req,
1805 int *perrno)
1807 struct tstream_bsd_readv_state *state = tevent_req_data(req,
1808 struct tstream_bsd_readv_state);
1809 int ret;
1811 ret = tsocket_simple_int_recv(req, perrno);
1812 if (ret == 0) {
1813 ret = state->ret;
1816 tevent_req_received(req);
1817 return ret;
1820 struct tstream_bsd_writev_state {
1821 struct tstream_context *stream;
1823 struct iovec *vector;
1824 size_t count;
1826 int ret;
1829 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1831 struct tstream_bsd *bsds = tstream_context_data(state->stream,
1832 struct tstream_bsd);
1834 tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1836 return 0;
1839 static void tstream_bsd_writev_handler(void *private_data);
1841 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1842 struct tevent_context *ev,
1843 struct tstream_context *stream,
1844 const struct iovec *vector,
1845 size_t count)
1847 struct tevent_req *req;
1848 struct tstream_bsd_writev_state *state;
1849 struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1850 int ret;
1852 req = tevent_req_create(mem_ctx, &state,
1853 struct tstream_bsd_writev_state);
1854 if (!req) {
1855 return NULL;
1858 state->stream = stream;
1859 /* we make a copy of the vector so that we can modify it */
1860 state->vector = talloc_array(state, struct iovec, count);
1861 if (tevent_req_nomem(state->vector, req)) {
1862 goto post;
1864 memcpy(state->vector, vector, sizeof(struct iovec)*count);
1865 state->count = count;
1866 state->ret = 0;
1868 talloc_set_destructor(state, tstream_bsd_writev_destructor);
1870 if (bsds->fd == -1) {
1871 tevent_req_error(req, ENOTCONN);
1872 goto post;
1876 * this is a fast path, not waiting for the
1877 * socket to become explicit writeable gains
1878 * about 10%-20% performance in benchmark tests.
1880 tstream_bsd_writev_handler(req);
1881 if (!tevent_req_is_in_progress(req)) {
1882 goto post;
1885 ret = tstream_bsd_set_writeable_handler(bsds, ev,
1886 tstream_bsd_writev_handler,
1887 req);
1888 if (ret == -1) {
1889 tevent_req_error(req, errno);
1890 goto post;
1893 return req;
1895 post:
1896 tevent_req_post(req, ev);
1897 return req;
1900 static void tstream_bsd_writev_handler(void *private_data)
1902 struct tevent_req *req = talloc_get_type_abort(private_data,
1903 struct tevent_req);
1904 struct tstream_bsd_writev_state *state = tevent_req_data(req,
1905 struct tstream_bsd_writev_state);
1906 struct tstream_context *stream = state->stream;
1907 struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1908 ssize_t ret;
1909 int err;
1910 bool retry;
1912 ret = writev(bsds->fd, state->vector, state->count);
1913 if (ret == 0) {
1914 /* propagate end of file */
1915 tevent_req_error(req, EPIPE);
1916 return;
1918 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1919 if (retry) {
1920 /* retry later */
1921 return;
1923 if (tevent_req_error(req, err)) {
1924 return;
1927 state->ret += ret;
1929 while (ret > 0) {
1930 if (ret < state->vector[0].iov_len) {
1931 uint8_t *base;
1932 base = (uint8_t *)state->vector[0].iov_base;
1933 base += ret;
1934 state->vector[0].iov_base = (void *)base;
1935 state->vector[0].iov_len -= ret;
1936 break;
1938 ret -= state->vector[0].iov_len;
1939 state->vector += 1;
1940 state->count -= 1;
1944 * there're maybe some empty vectors at the end
1945 * which we need to skip, otherwise we would get
1946 * ret == 0 from the writev() call and return EPIPE
1948 while (state->count > 0) {
1949 if (state->vector[0].iov_len > 0) {
1950 break;
1952 state->vector += 1;
1953 state->count -= 1;
1956 if (state->count > 0) {
1957 /* we have more to read */
1958 return;
1961 tevent_req_done(req);
1964 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1966 struct tstream_bsd_writev_state *state = tevent_req_data(req,
1967 struct tstream_bsd_writev_state);
1968 int ret;
1970 ret = tsocket_simple_int_recv(req, perrno);
1971 if (ret == 0) {
1972 ret = state->ret;
1975 tevent_req_received(req);
1976 return ret;
1979 struct tstream_bsd_disconnect_state {
1980 void *__dummy;
1983 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1984 struct tevent_context *ev,
1985 struct tstream_context *stream)
1987 struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1988 struct tevent_req *req;
1989 struct tstream_bsd_disconnect_state *state;
1990 int ret;
1991 int err;
1992 bool dummy;
1994 req = tevent_req_create(mem_ctx, &state,
1995 struct tstream_bsd_disconnect_state);
1996 if (req == NULL) {
1997 return NULL;
2000 if (bsds->fd == -1) {
2001 tevent_req_error(req, ENOTCONN);
2002 goto post;
2005 TALLOC_FREE(bsds->fde);
2006 ret = close(bsds->fd);
2007 bsds->fd = -1;
2008 err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
2009 if (tevent_req_error(req, err)) {
2010 goto post;
2013 tevent_req_done(req);
2014 post:
2015 tevent_req_post(req, ev);
2016 return req;
2019 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
2020 int *perrno)
2022 int ret;
2024 ret = tsocket_simple_int_recv(req, perrno);
2026 tevent_req_received(req);
2027 return ret;
2030 static const struct tstream_context_ops tstream_bsd_ops = {
2031 .name = "bsd",
2033 .pending_bytes = tstream_bsd_pending_bytes,
2035 .readv_send = tstream_bsd_readv_send,
2036 .readv_recv = tstream_bsd_readv_recv,
2038 .writev_send = tstream_bsd_writev_send,
2039 .writev_recv = tstream_bsd_writev_recv,
2041 .disconnect_send = tstream_bsd_disconnect_send,
2042 .disconnect_recv = tstream_bsd_disconnect_recv,
2045 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
2047 TALLOC_FREE(bsds->fde);
2048 if (bsds->fd != -1) {
2049 close(bsds->fd);
2050 bsds->fd = -1;
2052 return 0;
2055 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
2056 int fd,
2057 struct tstream_context **_stream,
2058 const char *location)
2060 struct tstream_context *stream;
2061 struct tstream_bsd *bsds;
2063 stream = tstream_context_create(mem_ctx,
2064 &tstream_bsd_ops,
2065 &bsds,
2066 struct tstream_bsd,
2067 location);
2068 if (!stream) {
2069 return -1;
2071 ZERO_STRUCTP(bsds);
2072 bsds->fd = fd;
2073 talloc_set_destructor(bsds, tstream_bsd_destructor);
2075 *_stream = stream;
2076 return 0;
2079 struct tstream_bsd_connect_state {
2080 int fd;
2081 struct tevent_fd *fde;
2082 struct tstream_conext *stream;
2083 struct tsocket_address *local;
2086 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
2088 TALLOC_FREE(state->fde);
2089 if (state->fd != -1) {
2090 close(state->fd);
2091 state->fd = -1;
2094 return 0;
2097 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2098 struct tevent_fd *fde,
2099 uint16_t flags,
2100 void *private_data);
2102 static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
2103 struct tevent_context *ev,
2104 int sys_errno,
2105 const struct tsocket_address *local,
2106 const struct tsocket_address *remote)
2108 struct tevent_req *req;
2109 struct tstream_bsd_connect_state *state;
2110 struct tsocket_address_bsd *lbsda =
2111 talloc_get_type_abort(local->private_data,
2112 struct tsocket_address_bsd);
2113 struct tsocket_address_bsd *lrbsda = NULL;
2114 struct tsocket_address_bsd *rbsda =
2115 talloc_get_type_abort(remote->private_data,
2116 struct tsocket_address_bsd);
2117 int ret;
2118 int err;
2119 bool retry;
2120 bool do_bind = false;
2121 bool do_reuseaddr = false;
2122 bool do_ipv6only = false;
2123 bool is_inet = false;
2124 int sa_fam = lbsda->u.sa.sa_family;
2126 req = tevent_req_create(mem_ctx, &state,
2127 struct tstream_bsd_connect_state);
2128 if (!req) {
2129 return NULL;
2131 state->fd = -1;
2132 state->fde = NULL;
2134 talloc_set_destructor(state, tstream_bsd_connect_destructor);
2136 /* give the wrappers a chance to report an error */
2137 if (sys_errno != 0) {
2138 tevent_req_error(req, sys_errno);
2139 goto post;
2142 switch (lbsda->u.sa.sa_family) {
2143 case AF_UNIX:
2144 if (lbsda->u.un.sun_path[0] != 0) {
2145 do_reuseaddr = true;
2146 do_bind = true;
2148 break;
2149 case AF_INET:
2150 if (lbsda->u.in.sin_port != 0) {
2151 do_reuseaddr = true;
2152 do_bind = true;
2154 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
2155 do_bind = true;
2157 is_inet = true;
2158 break;
2159 #ifdef HAVE_IPV6
2160 case AF_INET6:
2161 if (lbsda->u.in6.sin6_port != 0) {
2162 do_reuseaddr = true;
2163 do_bind = true;
2165 if (memcmp(&in6addr_any,
2166 &lbsda->u.in6.sin6_addr,
2167 sizeof(in6addr_any)) != 0) {
2168 do_bind = true;
2170 is_inet = true;
2171 do_ipv6only = true;
2172 break;
2173 #endif
2174 default:
2175 tevent_req_error(req, EINVAL);
2176 goto post;
2179 if (!do_bind && is_inet) {
2180 sa_fam = rbsda->u.sa.sa_family;
2181 switch (sa_fam) {
2182 case AF_INET:
2183 do_ipv6only = false;
2184 break;
2185 #ifdef HAVE_IPV6
2186 case AF_INET6:
2187 do_ipv6only = true;
2188 break;
2189 #endif
2193 if (is_inet) {
2194 state->local = tsocket_address_create(state,
2195 &tsocket_address_bsd_ops,
2196 &lrbsda,
2197 struct tsocket_address_bsd,
2198 __location__ "bsd_connect");
2199 if (tevent_req_nomem(state->local, req)) {
2200 goto post;
2203 ZERO_STRUCTP(lrbsda);
2204 lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
2205 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2206 lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
2207 #endif
2210 state->fd = socket(sa_fam, SOCK_STREAM, 0);
2211 if (state->fd == -1) {
2212 tevent_req_error(req, errno);
2213 goto post;
2216 state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2217 if (state->fd == -1) {
2218 tevent_req_error(req, errno);
2219 goto post;
2222 #ifdef HAVE_IPV6
2223 if (do_ipv6only) {
2224 int val = 1;
2226 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2227 (const void *)&val, sizeof(val));
2228 if (ret == -1) {
2229 tevent_req_error(req, errno);
2230 goto post;
2233 #endif
2235 if (do_reuseaddr) {
2236 int val = 1;
2238 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2239 (const void *)&val, sizeof(val));
2240 if (ret == -1) {
2241 tevent_req_error(req, errno);
2242 goto post;
2246 if (do_bind) {
2247 ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
2248 if (ret == -1) {
2249 tevent_req_error(req, errno);
2250 goto post;
2254 if (rbsda->u.sa.sa_family != sa_fam) {
2255 tevent_req_error(req, EINVAL);
2256 goto post;
2259 ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
2260 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2261 if (retry) {
2262 /* retry later */
2263 goto async;
2265 if (tevent_req_error(req, err)) {
2266 goto post;
2269 if (!state->local) {
2270 tevent_req_done(req);
2271 goto post;
2274 ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
2275 if (ret == -1) {
2276 tevent_req_error(req, errno);
2277 goto post;
2280 tevent_req_done(req);
2281 goto post;
2283 async:
2284 state->fde = tevent_add_fd(ev, state,
2285 state->fd,
2286 TEVENT_FD_READ | TEVENT_FD_WRITE,
2287 tstream_bsd_connect_fde_handler,
2288 req);
2289 if (tevent_req_nomem(state->fde, req)) {
2290 goto post;
2293 return req;
2295 post:
2296 tevent_req_post(req, ev);
2297 return req;
2300 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2301 struct tevent_fd *fde,
2302 uint16_t flags,
2303 void *private_data)
2305 struct tevent_req *req = talloc_get_type_abort(private_data,
2306 struct tevent_req);
2307 struct tstream_bsd_connect_state *state = tevent_req_data(req,
2308 struct tstream_bsd_connect_state);
2309 struct tsocket_address_bsd *lrbsda = NULL;
2310 int ret;
2311 int error=0;
2312 socklen_t len = sizeof(error);
2313 int err;
2314 bool retry;
2316 ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2317 if (ret == 0) {
2318 if (error != 0) {
2319 errno = error;
2320 ret = -1;
2323 err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2324 if (retry) {
2325 /* retry later */
2326 return;
2328 if (tevent_req_error(req, err)) {
2329 return;
2332 if (!state->local) {
2333 tevent_req_done(req);
2334 return;
2337 lrbsda = talloc_get_type_abort(state->local->private_data,
2338 struct tsocket_address_bsd);
2340 ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
2341 if (ret == -1) {
2342 tevent_req_error(req, errno);
2343 return;
2346 tevent_req_done(req);
2349 static int tstream_bsd_connect_recv(struct tevent_req *req,
2350 int *perrno,
2351 TALLOC_CTX *mem_ctx,
2352 struct tstream_context **stream,
2353 struct tsocket_address **local,
2354 const char *location)
2356 struct tstream_bsd_connect_state *state = tevent_req_data(req,
2357 struct tstream_bsd_connect_state);
2358 int ret;
2360 ret = tsocket_simple_int_recv(req, perrno);
2361 if (ret == 0) {
2362 ret = _tstream_bsd_existing_socket(mem_ctx,
2363 state->fd,
2364 stream,
2365 location);
2366 if (ret == -1) {
2367 *perrno = errno;
2368 goto done;
2370 TALLOC_FREE(state->fde);
2371 state->fd = -1;
2373 if (local) {
2374 *local = talloc_move(mem_ctx, &state->local);
2378 done:
2379 tevent_req_received(req);
2380 return ret;
2383 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2384 struct tevent_context *ev,
2385 const struct tsocket_address *local,
2386 const struct tsocket_address *remote)
2388 struct tsocket_address_bsd *lbsda =
2389 talloc_get_type_abort(local->private_data,
2390 struct tsocket_address_bsd);
2391 struct tevent_req *req;
2392 int sys_errno = 0;
2394 switch (lbsda->u.sa.sa_family) {
2395 case AF_INET:
2396 break;
2397 #ifdef HAVE_IPV6
2398 case AF_INET6:
2399 break;
2400 #endif
2401 default:
2402 sys_errno = EINVAL;
2403 break;
2406 req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2408 return req;
2411 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2412 int *perrno,
2413 TALLOC_CTX *mem_ctx,
2414 struct tstream_context **stream,
2415 struct tsocket_address **local,
2416 const char *location)
2418 return tstream_bsd_connect_recv(req, perrno,
2419 mem_ctx, stream, local,
2420 location);
2423 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2424 struct tevent_context *ev,
2425 const struct tsocket_address *local,
2426 const struct tsocket_address *remote)
2428 struct tsocket_address_bsd *lbsda =
2429 talloc_get_type_abort(local->private_data,
2430 struct tsocket_address_bsd);
2431 struct tevent_req *req;
2432 int sys_errno = 0;
2434 switch (lbsda->u.sa.sa_family) {
2435 case AF_UNIX:
2436 break;
2437 default:
2438 sys_errno = EINVAL;
2439 break;
2442 req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2444 return req;
2447 int _tstream_unix_connect_recv(struct tevent_req *req,
2448 int *perrno,
2449 TALLOC_CTX *mem_ctx,
2450 struct tstream_context **stream,
2451 const char *location)
2453 return tstream_bsd_connect_recv(req, perrno,
2454 mem_ctx, stream, NULL,
2455 location);
2458 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2459 struct tstream_context **_stream1,
2460 TALLOC_CTX *mem_ctx2,
2461 struct tstream_context **_stream2,
2462 const char *location)
2464 int ret;
2465 int fds[2];
2466 int fd1;
2467 int fd2;
2468 struct tstream_context *stream1 = NULL;
2469 struct tstream_context *stream2 = NULL;
2471 ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2472 if (ret == -1) {
2473 return -1;
2475 fd1 = fds[0];
2476 fd2 = fds[1];
2478 fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2479 if (fd1 == -1) {
2480 int sys_errno = errno;
2481 close(fd2);
2482 errno = sys_errno;
2483 return -1;
2486 fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2487 if (fd2 == -1) {
2488 int sys_errno = errno;
2489 close(fd1);
2490 errno = sys_errno;
2491 return -1;
2494 ret = _tstream_bsd_existing_socket(mem_ctx1,
2495 fd1,
2496 &stream1,
2497 location);
2498 if (ret == -1) {
2499 int sys_errno = errno;
2500 close(fd1);
2501 close(fd2);
2502 errno = sys_errno;
2503 return -1;
2506 ret = _tstream_bsd_existing_socket(mem_ctx2,
2507 fd2,
2508 &stream2,
2509 location);
2510 if (ret == -1) {
2511 int sys_errno = errno;
2512 talloc_free(stream1);
2513 close(fd2);
2514 errno = sys_errno;
2515 return -1;
2518 *_stream1 = stream1;
2519 *_stream2 = stream2;
2520 return 0;