s3:unix_msg: don't close the fd-array at the end of unix_dgram_send_job()
[Samba.git] / source3 / lib / unix_msg / unix_msg.c
blobc3bcb56b19246503a2265bfe035f5f97f4b4c959
1 /*
2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "replace.h"
20 #include "unix_msg.h"
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include <fcntl.h>
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg {
40 struct unix_dgram_msg *prev, *next;
42 int sock;
43 ssize_t sent;
44 int sys_errno;
45 size_t num_fds;
46 int *fds;
47 size_t buflen;
48 uint8_t buf[];
51 struct unix_dgram_send_queue {
52 struct unix_dgram_send_queue *prev, *next;
53 struct unix_dgram_ctx *ctx;
54 int sock;
55 struct unix_dgram_msg *msgs;
56 char path[];
59 struct unix_dgram_ctx {
60 int sock;
61 pid_t created_pid;
62 const struct poll_funcs *ev_funcs;
63 size_t max_msg;
65 void (*recv_callback)(struct unix_dgram_ctx *ctx,
66 uint8_t *msg, size_t msg_len,
67 int *fds, size_t num_fds,
68 void *private_data);
69 void *private_data;
71 struct poll_watch *sock_read_watch;
72 struct unix_dgram_send_queue *send_queues;
74 struct pthreadpool *send_pool;
75 struct poll_watch *pool_read_watch;
77 uint8_t *recv_buf;
78 char path[];
81 static ssize_t iov_buflen(const struct iovec *iov, int iovlen);
82 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
83 void *private_data);
85 /* Set socket non blocking. */
86 static int prepare_socket_nonblock(int sock)
88 int flags;
89 #ifdef O_NONBLOCK
90 #define FLAG_TO_SET O_NONBLOCK
91 #else
92 #ifdef SYSV
93 #define FLAG_TO_SET O_NDELAY
94 #else /* BSD */
95 #define FLAG_TO_SET FNDELAY
96 #endif
97 #endif
99 flags = fcntl(sock, F_GETFL);
100 if (flags == -1) {
101 return errno;
103 flags |= FLAG_TO_SET;
104 if (fcntl(sock, F_SETFL, flags) == -1) {
105 return errno;
108 #undef FLAG_TO_SET
109 return 0;
112 /* Set socket close on exec. */
113 static int prepare_socket_cloexec(int sock)
115 #ifdef FD_CLOEXEC
116 int flags;
118 flags = fcntl(sock, F_GETFD, 0);
119 if (flags == -1) {
120 return errno;
122 flags |= FD_CLOEXEC;
123 if (fcntl(sock, F_SETFD, flags) == -1) {
124 return errno;
126 #endif
127 return 0;
130 /* Set socket non blocking and close on exec. */
131 static int prepare_socket(int sock)
133 int ret = prepare_socket_nonblock(sock);
135 if (ret) {
136 return ret;
138 return prepare_socket_cloexec(sock);
141 static void close_fd_array(int *fds, size_t num_fds)
143 size_t i;
145 for (i = 0; i < num_fds; i++) {
146 if (fds[i] == -1) {
147 continue;
150 close(fds[i]);
151 fds[i] = -1;
155 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
156 const struct poll_funcs *ev_funcs,
157 void (*recv_callback)(struct unix_dgram_ctx *ctx,
158 uint8_t *msg, size_t msg_len,
159 int *fds, size_t num_fds,
160 void *private_data),
161 void *private_data,
162 struct unix_dgram_ctx **result)
164 struct unix_dgram_ctx *ctx;
165 size_t pathlen;
166 int ret;
168 if (addr != NULL) {
169 pathlen = strlen(addr->sun_path)+1;
170 } else {
171 pathlen = 1;
174 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
175 if (ctx == NULL) {
176 return ENOMEM;
178 if (addr != NULL) {
179 memcpy(ctx->path, addr->sun_path, pathlen);
180 } else {
181 ctx->path[0] = '\0';
184 *ctx = (struct unix_dgram_ctx) {
185 .max_msg = max_msg,
186 .ev_funcs = ev_funcs,
187 .recv_callback = recv_callback,
188 .private_data = private_data,
189 .created_pid = (pid_t)-1
192 ctx->recv_buf = malloc(max_msg);
193 if (ctx->recv_buf == NULL) {
194 free(ctx);
195 return ENOMEM;
198 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
199 if (ctx->sock == -1) {
200 ret = errno;
201 goto fail_free;
204 /* Set non-blocking and close-on-exec. */
205 ret = prepare_socket(ctx->sock);
206 if (ret != 0) {
207 goto fail_close;
210 if (addr != NULL) {
211 ret = bind(ctx->sock,
212 (const struct sockaddr *)(const void *)addr,
213 sizeof(*addr));
214 if (ret == -1) {
215 ret = errno;
216 goto fail_close;
219 ctx->created_pid = getpid();
221 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
222 ctx->ev_funcs, ctx->sock, POLLIN,
223 unix_dgram_recv_handler, ctx);
225 if (ctx->sock_read_watch == NULL) {
226 ret = ENOMEM;
227 goto fail_close;
231 *result = ctx;
232 return 0;
234 fail_close:
235 close(ctx->sock);
236 fail_free:
237 free(ctx->recv_buf);
238 free(ctx);
239 return ret;
242 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
243 void *private_data)
245 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
246 ssize_t received;
247 int flags = 0;
248 struct msghdr msg;
249 struct iovec iov;
250 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
251 char buf[CMSG_SPACE(sizeof(int)*INT8_MAX)] = { 0, };
252 struct cmsghdr *cmsg;
253 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
254 int *fds = NULL;
255 size_t i, num_fds = 0;
257 iov = (struct iovec) {
258 .iov_base = (void *)ctx->recv_buf,
259 .iov_len = ctx->max_msg,
262 msg = (struct msghdr) {
263 .msg_iov = &iov,
264 .msg_iovlen = 1,
265 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
266 .msg_control = buf,
267 .msg_controllen = sizeof(buf),
268 #endif
271 #ifdef MSG_CMSG_CLOEXEC
272 flags |= MSG_CMSG_CLOEXEC;
273 #endif
275 received = recvmsg(fd, &msg, flags);
276 if (received == -1) {
277 if ((errno == EAGAIN) ||
278 (errno == EWOULDBLOCK) ||
279 (errno == EINTR) || (errno == ENOMEM)) {
280 /* Not really an error - just try again. */
281 return;
283 /* Problem with the socket. Set it unreadable. */
284 ctx->ev_funcs->watch_update(w, 0);
285 return;
287 if (received > ctx->max_msg) {
288 /* More than we expected, not for us */
289 return;
292 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
293 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
294 cmsg = CMSG_NXTHDR(&msg, cmsg))
296 void *data = CMSG_DATA(cmsg);
298 if (cmsg->cmsg_type != SCM_RIGHTS) {
299 continue;
301 if (cmsg->cmsg_level != SOL_SOCKET) {
302 continue;
305 fds = (int *)data;
306 num_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof (int);
307 break;
309 #endif
311 for (i = 0; i < num_fds; i++) {
312 int err;
314 err = prepare_socket_cloexec(fds[i]);
315 if (err != 0) {
316 goto cleanup_fds;
320 ctx->recv_callback(ctx, ctx->recv_buf, received,
321 fds, num_fds, ctx->private_data);
322 return;
324 cleanup_fds:
325 close_fd_array(fds, num_fds);
327 ctx->recv_callback(ctx, ctx->recv_buf, received,
328 NULL, 0, ctx->private_data);
331 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
332 void *private_data);
334 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
336 int ret, signalfd;
338 if (ctx->send_pool != NULL) {
339 return 0;
342 ret = pthreadpool_init(0, &ctx->send_pool);
343 if (ret != 0) {
344 return ret;
347 signalfd = pthreadpool_signal_fd(ctx->send_pool);
349 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
350 ctx->ev_funcs, signalfd, POLLIN,
351 unix_dgram_job_finished, ctx);
352 if (ctx->pool_read_watch == NULL) {
353 pthreadpool_destroy(ctx->send_pool);
354 ctx->send_pool = NULL;
355 return ENOMEM;
358 return 0;
361 static int unix_dgram_send_queue_init(
362 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
363 struct unix_dgram_send_queue **result)
365 struct unix_dgram_send_queue *q;
366 size_t pathlen;
367 int ret, err;
369 pathlen = strlen(dst->sun_path)+1;
371 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
372 if (q == NULL) {
373 return ENOMEM;
375 q->ctx = ctx;
376 q->msgs = NULL;
377 memcpy(q->path, dst->sun_path, pathlen);
379 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
380 if (q->sock == -1) {
381 err = errno;
382 goto fail_free;
385 err = prepare_socket_cloexec(q->sock);
386 if (err != 0) {
387 goto fail_close;
390 do {
391 ret = connect(q->sock,
392 (const struct sockaddr *)(const void *)dst,
393 sizeof(*dst));
394 } while ((ret == -1) && (errno == EINTR));
396 if (ret == -1) {
397 err = errno;
398 goto fail_close;
401 err = unix_dgram_init_pthreadpool(ctx);
402 if (err != 0) {
403 goto fail_close;
406 DLIST_ADD(ctx->send_queues, q);
408 *result = q;
409 return 0;
411 fail_close:
412 close(q->sock);
413 fail_free:
414 free(q);
415 return err;
418 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
420 struct unix_dgram_ctx *ctx = q->ctx;
422 while (q->msgs != NULL) {
423 struct unix_dgram_msg *msg;
424 msg = q->msgs;
425 DLIST_REMOVE(q->msgs, msg);
426 close_fd_array(msg->fds, msg->num_fds);
427 free(msg);
429 close(q->sock);
430 DLIST_REMOVE(ctx->send_queues, q);
431 free(q);
434 static struct unix_dgram_send_queue *find_send_queue(
435 struct unix_dgram_ctx *ctx, const char *dst_sock)
437 struct unix_dgram_send_queue *s;
439 for (s = ctx->send_queues; s != NULL; s = s->next) {
440 if (strcmp(s->path, dst_sock) == 0) {
441 return s;
444 return NULL;
447 static int queue_msg(struct unix_dgram_send_queue *q,
448 const struct iovec *iov, int iovlen,
449 const int *fds, size_t num_fds)
451 struct unix_dgram_msg *msg;
452 ssize_t buflen;
453 size_t msglen;
454 size_t fds_size = sizeof(int) * num_fds;
455 int fds_copy[MIN(num_fds, INT8_MAX)];
456 size_t fds_padding = 0;
457 int i;
458 size_t tmp;
459 int ret = -1;
461 if (num_fds > INT8_MAX) {
462 return EINVAL;
465 for (i = 0; i < num_fds; i++) {
466 fds_copy[i] = -1;
469 for (i = 0; i < num_fds; i++) {
470 fds_copy[i] = dup(fds[i]);
471 if (fds_copy[i] == -1) {
472 ret = errno;
473 goto fail;
477 buflen = iov_buflen(iov, iovlen);
478 if (buflen == -1) {
479 goto invalid;
482 msglen = offsetof(struct unix_dgram_msg, buf);
483 tmp = msglen + buflen;
484 if ((tmp < msglen) || (tmp < buflen)) {
485 /* overflow */
486 goto invalid;
488 msglen = tmp;
490 if (num_fds > 0) {
491 const size_t fds_align = sizeof(int) - 1;
493 tmp = msglen + fds_align;
494 if ((tmp < msglen) || (tmp < fds_align)) {
495 /* overflow */
496 goto invalid;
498 tmp &= ~fds_align;
500 fds_padding = tmp - msglen;
501 msglen = tmp;
503 tmp = msglen + fds_size;
504 if ((tmp < msglen) || (tmp < fds_size)) {
505 /* overflow */
506 goto invalid;
508 msglen = tmp;
511 msg = malloc(msglen);
512 if (msg == NULL) {
513 ret = ENOMEM;
514 goto fail;
516 msg->buflen = buflen;
517 msg->sock = q->sock;
519 buflen = 0;
520 for (i=0; i<iovlen; i++) {
521 memcpy(&msg->buf[buflen], iov[i].iov_base, iov[i].iov_len);
522 buflen += iov[i].iov_len;
525 msg->num_fds = num_fds;
526 if (msg->num_fds > 0) {
527 void *fds_ptr = (void *)&msg->buf[buflen+fds_padding];
528 memcpy(fds_ptr, fds_copy, fds_size);
529 msg->fds = (int *)fds_ptr;
530 } else {
531 msg->fds = NULL;
534 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
535 return 0;
537 invalid:
538 ret = EINVAL;
539 fail:
540 close_fd_array(fds_copy, num_fds);
541 return ret;
544 static void unix_dgram_send_job(void *private_data)
546 struct unix_dgram_msg *dmsg = private_data;
547 struct iovec iov = {
548 .iov_base = (void *)dmsg->buf,
549 .iov_len = dmsg->buflen,
551 struct msghdr msg = {
552 .msg_iov = &iov,
553 .msg_iovlen = 1,
555 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
556 struct cmsghdr *cmsg;
557 size_t fds_size = sizeof(int) * dmsg->num_fds;
558 size_t cmsg_len = CMSG_LEN(fds_size);
559 size_t cmsg_space = CMSG_SPACE(fds_size);
560 char cmsg_buf[cmsg_space];
562 if (dmsg->num_fds > 0) {
563 void *fdptr;
565 memset(cmsg_buf, 0, cmsg_space);
567 msg.msg_control = cmsg_buf;
568 msg.msg_controllen = cmsg_space;
569 cmsg = CMSG_FIRSTHDR(&msg);
570 cmsg->cmsg_level = SOL_SOCKET;
571 cmsg->cmsg_type = SCM_RIGHTS;
572 cmsg->cmsg_len = cmsg_len;
573 fdptr = CMSG_DATA(cmsg);
574 memcpy(fdptr, dmsg->fds, fds_size);
575 msg.msg_controllen = cmsg->cmsg_len;
577 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
579 do {
580 dmsg->sent = sendmsg(dmsg->sock, &msg, 0);
581 } while ((dmsg->sent == -1) && (errno == EINTR));
584 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
585 void *private_data)
587 struct unix_dgram_ctx *ctx = private_data;
588 struct unix_dgram_send_queue *q;
589 struct unix_dgram_msg *msg;
590 int ret, job;
592 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
593 if (ret != 1) {
594 return;
597 for (q = ctx->send_queues; q != NULL; q = q->next) {
598 if (job == q->sock) {
599 break;
603 if (q == NULL) {
604 /* Huh? Should not happen */
605 return;
608 msg = q->msgs;
609 DLIST_REMOVE(q->msgs, msg);
610 close_fd_array(msg->fds, msg->num_fds);
611 free(msg);
613 if (q->msgs != NULL) {
614 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
615 unix_dgram_send_job, q->msgs);
616 if (ret == 0) {
617 return;
621 unix_dgram_send_queue_free(q);
624 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
625 const struct sockaddr_un *dst,
626 const struct iovec *iov, int iovlen,
627 const int *fds, size_t num_fds)
629 struct unix_dgram_send_queue *q;
630 struct msghdr msg;
631 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
632 struct cmsghdr *cmsg;
633 size_t fds_size = sizeof(int) * num_fds;
634 size_t cmsg_len = CMSG_LEN(fds_size);
635 size_t cmsg_space = CMSG_SPACE(fds_size);
636 char cmsg_buf[cmsg_space];
637 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
638 int ret;
639 int i;
641 if (num_fds > INT8_MAX) {
642 return EINVAL;
645 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
646 if (num_fds > 0) {
647 return ENOSYS;
649 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
651 for (i = 0; i < num_fds; i++) {
653 * Make sure we only allow fd passing
654 * for communication channels,
655 * e.g. sockets, pipes, fifos, ...
657 ret = lseek(fds[i], 0, SEEK_CUR);
658 if (ret == -1 && errno == ESPIPE) {
659 /* ok */
660 continue;
664 * Reject the message as we may need to call dup(),
665 * if we queue the message.
667 * That might result in unexpected behavior for the caller
668 * for files and broken posix locking.
670 return EINVAL;
674 * To preserve message ordering, we have to queue a message when
675 * others are waiting in line already.
677 q = find_send_queue(ctx, dst->sun_path);
678 if (q != NULL) {
679 return queue_msg(q, iov, iovlen, fds, num_fds);
683 * Try a cheap nonblocking send
686 msg = (struct msghdr) {
687 .msg_name = discard_const_p(struct sockaddr_un, dst),
688 .msg_namelen = sizeof(*dst),
689 .msg_iov = discard_const_p(struct iovec, iov),
690 .msg_iovlen = iovlen
692 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
693 if (num_fds > 0) {
694 void *fdptr;
696 memset(cmsg_buf, 0, cmsg_space);
698 msg.msg_control = cmsg_buf;
699 msg.msg_controllen = cmsg_space;
700 cmsg = CMSG_FIRSTHDR(&msg);
701 cmsg->cmsg_level = SOL_SOCKET;
702 cmsg->cmsg_type = SCM_RIGHTS;
703 cmsg->cmsg_len = cmsg_len;
704 fdptr = CMSG_DATA(cmsg);
705 memcpy(fdptr, fds, fds_size);
706 msg.msg_controllen = cmsg->cmsg_len;
708 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
710 ret = sendmsg(ctx->sock, &msg, 0);
711 if (ret >= 0) {
712 return 0;
714 if ((errno != EWOULDBLOCK) && (errno != EAGAIN) && (errno != EINTR)) {
715 return errno;
718 ret = unix_dgram_send_queue_init(ctx, dst, &q);
719 if (ret != 0) {
720 return ret;
722 ret = queue_msg(q, iov, iovlen, fds, num_fds);
723 if (ret != 0) {
724 unix_dgram_send_queue_free(q);
725 return ret;
727 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
728 unix_dgram_send_job, q->msgs);
729 if (ret != 0) {
730 unix_dgram_send_queue_free(q);
731 return ret;
733 return 0;
736 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
738 return ctx->sock;
741 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
743 if (ctx->send_queues != NULL) {
744 return EBUSY;
747 if (ctx->send_pool != NULL) {
748 int ret = pthreadpool_destroy(ctx->send_pool);
749 if (ret != 0) {
750 return ret;
752 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
755 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
757 if (getpid() == ctx->created_pid) {
758 /* If we created it, unlink. Otherwise someone else might
759 * still have it open */
760 unlink(ctx->path);
763 close(ctx->sock);
764 free(ctx->recv_buf);
765 free(ctx);
766 return 0;
770 * Every message starts with a uint64_t cookie.
772 * A value of 0 indicates a single-fragment message which is complete in
773 * itself. The data immediately follows the cookie.
775 * Every multi-fragment message has a cookie != 0 and starts with a cookie
776 * followed by a struct unix_msg_header and then the data. The pid and sock
777 * fields are used to assure uniqueness on the receiver side.
780 struct unix_msg_hdr {
781 size_t msglen;
782 pid_t pid;
783 int sock;
786 struct unix_msg {
787 struct unix_msg *prev, *next;
788 size_t msglen;
789 size_t received;
790 pid_t sender_pid;
791 int sender_sock;
792 uint64_t cookie;
793 uint8_t buf[1];
796 struct unix_msg_ctx {
797 struct unix_dgram_ctx *dgram;
798 size_t fragment_len;
799 uint64_t cookie;
801 void (*recv_callback)(struct unix_msg_ctx *ctx,
802 uint8_t *msg, size_t msg_len,
803 int *fds, size_t num_fds,
804 void *private_data);
805 void *private_data;
807 struct unix_msg *msgs;
810 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
811 uint8_t *buf, size_t buflen,
812 int *fds, size_t num_fds,
813 void *private_data);
815 int unix_msg_init(const struct sockaddr_un *addr,
816 const struct poll_funcs *ev_funcs,
817 size_t fragment_len, uint64_t cookie,
818 void (*recv_callback)(struct unix_msg_ctx *ctx,
819 uint8_t *msg, size_t msg_len,
820 int *fds, size_t num_fds,
821 void *private_data),
822 void *private_data,
823 struct unix_msg_ctx **result)
825 struct unix_msg_ctx *ctx;
826 int ret;
828 ctx = malloc(sizeof(*ctx));
829 if (ctx == NULL) {
830 return ENOMEM;
833 *ctx = (struct unix_msg_ctx) {
834 .fragment_len = fragment_len,
835 .cookie = cookie,
836 .recv_callback = recv_callback,
837 .private_data = private_data
840 ret = unix_dgram_init(addr, fragment_len, ev_funcs,
841 unix_msg_recv, ctx, &ctx->dgram);
842 if (ret != 0) {
843 free(ctx);
844 return ret;
847 *result = ctx;
848 return 0;
851 int unix_msg_send(struct unix_msg_ctx *ctx, const struct sockaddr_un *dst,
852 const struct iovec *iov, int iovlen,
853 const int *fds, size_t num_fds)
855 ssize_t msglen;
856 size_t sent;
857 int ret = 0;
858 struct iovec iov_copy[iovlen+2];
859 struct unix_msg_hdr hdr;
860 struct iovec src_iov;
862 if (iovlen < 0) {
863 return EINVAL;
866 msglen = iov_buflen(iov, iovlen);
867 if (msglen == -1) {
868 return EINVAL;
871 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
872 if (num_fds > 0) {
873 return ENOSYS;
875 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
877 if (num_fds > INT8_MAX) {
878 return EINVAL;
881 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
882 uint64_t cookie = 0;
884 iov_copy[0].iov_base = &cookie;
885 iov_copy[0].iov_len = sizeof(cookie);
886 if (iovlen > 0) {
887 memcpy(&iov_copy[1], iov,
888 sizeof(struct iovec) * iovlen);
891 return unix_dgram_send(ctx->dgram, dst, iov_copy, iovlen+1,
892 fds, num_fds);
895 hdr = (struct unix_msg_hdr) {
896 .msglen = msglen,
897 .pid = getpid(),
898 .sock = unix_dgram_sock(ctx->dgram)
901 iov_copy[0].iov_base = &ctx->cookie;
902 iov_copy[0].iov_len = sizeof(ctx->cookie);
903 iov_copy[1].iov_base = &hdr;
904 iov_copy[1].iov_len = sizeof(hdr);
906 sent = 0;
907 src_iov = iov[0];
910 * The following write loop sends the user message in pieces. We have
911 * filled the first two iovecs above with "cookie" and "hdr". In the
912 * following loops we pull message chunks from the user iov array and
913 * fill iov_copy piece by piece, possibly truncating chunks from the
914 * caller's iov array. Ugly, but hopefully efficient.
917 while (sent < msglen) {
918 size_t fragment_len;
919 size_t iov_index = 2;
921 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
923 while (fragment_len < ctx->fragment_len) {
924 size_t space, chunk;
926 space = ctx->fragment_len - fragment_len;
927 chunk = MIN(space, src_iov.iov_len);
929 iov_copy[iov_index].iov_base = src_iov.iov_base;
930 iov_copy[iov_index].iov_len = chunk;
931 iov_index += 1;
933 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
934 src_iov.iov_len -= chunk;
935 fragment_len += chunk;
937 if (src_iov.iov_len == 0) {
938 iov += 1;
939 iovlen -= 1;
940 if (iovlen == 0) {
941 break;
943 src_iov = iov[0];
946 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
949 * only the last fragment should pass the fd array.
950 * That simplifies the receiver a lot.
952 if (sent < msglen) {
953 ret = unix_dgram_send(ctx->dgram, dst,
954 iov_copy, iov_index,
955 NULL, 0);
956 } else {
957 ret = unix_dgram_send(ctx->dgram, dst,
958 iov_copy, iov_index,
959 fds, num_fds);
961 if (ret != 0) {
962 break;
966 ctx->cookie += 1;
967 if (ctx->cookie == 0) {
968 ctx->cookie += 1;
971 return ret;
974 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
975 uint8_t *buf, size_t buflen,
976 int *fds, size_t num_fds,
977 void *private_data)
979 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
980 struct unix_msg_hdr hdr;
981 struct unix_msg *msg;
982 size_t space;
983 uint64_t cookie;
985 if (buflen < sizeof(cookie)) {
986 goto close_fds;
989 memcpy(&cookie, buf, sizeof(cookie));
991 buf += sizeof(cookie);
992 buflen -= sizeof(cookie);
994 if (cookie == 0) {
995 ctx->recv_callback(ctx, buf, buflen, fds, num_fds, ctx->private_data);
996 return;
999 if (buflen < sizeof(hdr)) {
1000 goto close_fds;
1002 memcpy(&hdr, buf, sizeof(hdr));
1004 buf += sizeof(hdr);
1005 buflen -= sizeof(hdr);
1007 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
1008 if ((msg->sender_pid == hdr.pid) &&
1009 (msg->sender_sock == hdr.sock)) {
1010 break;
1014 if ((msg != NULL) && (msg->cookie != cookie)) {
1015 DLIST_REMOVE(ctx->msgs, msg);
1016 free(msg);
1017 msg = NULL;
1020 if (msg == NULL) {
1021 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
1022 if (msg == NULL) {
1023 goto close_fds;
1025 *msg = (struct unix_msg) {
1026 .msglen = hdr.msglen,
1027 .sender_pid = hdr.pid,
1028 .sender_sock = hdr.sock,
1029 .cookie = cookie
1031 DLIST_ADD(ctx->msgs, msg);
1034 space = msg->msglen - msg->received;
1035 if (buflen > space) {
1036 goto close_fds;
1039 memcpy(msg->buf + msg->received, buf, buflen);
1040 msg->received += buflen;
1042 if (msg->received < msg->msglen) {
1043 goto close_fds;
1046 DLIST_REMOVE(ctx->msgs, msg);
1047 ctx->recv_callback(ctx, msg->buf, msg->msglen, fds, num_fds, ctx->private_data);
1048 free(msg);
1049 return;
1051 close_fds:
1052 close_fd_array(fds, num_fds);
1055 int unix_msg_free(struct unix_msg_ctx *ctx)
1057 int ret;
1059 ret = unix_dgram_free(ctx->dgram);
1060 if (ret != 0) {
1061 return ret;
1064 while (ctx->msgs != NULL) {
1065 struct unix_msg *msg = ctx->msgs;
1066 DLIST_REMOVE(ctx->msgs, msg);
1067 free(msg);
1070 free(ctx);
1071 return 0;
1074 static ssize_t iov_buflen(const struct iovec *iov, int iovlen)
1076 size_t buflen = 0;
1077 int i;
1079 for (i=0; i<iovlen; i++) {
1080 size_t thislen = iov[i].iov_len;
1081 size_t tmp = buflen + thislen;
1083 if ((tmp < buflen) || (tmp < thislen)) {
1084 /* overflow */
1085 return -1;
1087 buflen = tmp;
1089 return buflen;