unix_msg: Lift sockaddr_un handling from unix_dgram_send
[Samba.git] / source3 / lib / unix_msg / unix_msg.c
blobb53a4c65a14ee51382c2d664e12f18018f31618e
1 /*
2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "replace.h"
20 #include "unix_msg.h"
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include <fcntl.h>
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg {
40 struct unix_dgram_msg *prev, *next;
42 int sock;
43 ssize_t sent;
44 int sys_errno;
45 size_t buflen;
46 uint8_t buf[1];
49 struct unix_dgram_send_queue {
50 struct unix_dgram_send_queue *prev, *next;
51 struct unix_dgram_ctx *ctx;
52 int sock;
53 struct unix_dgram_msg *msgs;
54 char path[1];
57 struct unix_dgram_ctx {
58 int sock;
59 pid_t created_pid;
60 const struct poll_funcs *ev_funcs;
61 size_t max_msg;
63 void (*recv_callback)(struct unix_dgram_ctx *ctx,
64 uint8_t *msg, size_t msg_len,
65 void *private_data);
66 void *private_data;
68 struct poll_watch *sock_read_watch;
69 struct unix_dgram_send_queue *send_queues;
71 struct pthreadpool *send_pool;
72 struct poll_watch *pool_read_watch;
74 uint8_t *recv_buf;
75 char path[1];
78 static ssize_t iov_buflen(const struct iovec *iov, int iovlen);
79 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
80 void *private_data);
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock)
85 int flags;
86 #ifdef O_NONBLOCK
87 #define FLAG_TO_SET O_NONBLOCK
88 #else
89 #ifdef SYSV
90 #define FLAG_TO_SET O_NDELAY
91 #else /* BSD */
92 #define FLAG_TO_SET FNDELAY
93 #endif
94 #endif
96 flags = fcntl(sock, F_GETFL);
97 if (flags == -1) {
98 return errno;
100 flags |= FLAG_TO_SET;
101 if (fcntl(sock, F_SETFL, flags) == -1) {
102 return errno;
105 #undef FLAG_TO_SET
106 return 0;
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock)
112 #ifdef FD_CLOEXEC
113 int flags;
115 flags = fcntl(sock, F_GETFD, 0);
116 if (flags == -1) {
117 return errno;
119 flags |= FD_CLOEXEC;
120 if (fcntl(sock, F_SETFD, flags) == -1) {
121 return errno;
123 #endif
124 return 0;
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock)
130 int ret = prepare_socket_nonblock(sock);
132 if (ret) {
133 return ret;
135 return prepare_socket_cloexec(sock);
138 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
139 const struct poll_funcs *ev_funcs,
140 void (*recv_callback)(struct unix_dgram_ctx *ctx,
141 uint8_t *msg, size_t msg_len,
142 void *private_data),
143 void *private_data,
144 struct unix_dgram_ctx **result)
146 struct unix_dgram_ctx *ctx;
147 size_t pathlen;
148 int ret;
150 if (addr != NULL) {
151 pathlen = strlen(addr->sun_path)+1;
152 } else {
153 pathlen = 1;
156 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
157 if (ctx == NULL) {
158 return ENOMEM;
160 if (addr != NULL) {
161 memcpy(ctx->path, addr->sun_path, pathlen);
162 } else {
163 ctx->path[0] = '\0';
166 ctx->recv_buf = malloc(max_msg);
167 if (ctx->recv_buf == NULL) {
168 free(ctx);
169 return ENOMEM;
171 ctx->max_msg = max_msg;
172 ctx->ev_funcs = ev_funcs;
173 ctx->recv_callback = recv_callback;
174 ctx->private_data = private_data;
175 ctx->sock_read_watch = NULL;
176 ctx->send_pool = NULL;
177 ctx->pool_read_watch = NULL;
178 ctx->send_queues = NULL;
179 ctx->created_pid = (pid_t)-1;
181 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
182 if (ctx->sock == -1) {
183 ret = errno;
184 goto fail_free;
187 /* Set non-blocking and close-on-exec. */
188 ret = prepare_socket(ctx->sock);
189 if (ret != 0) {
190 goto fail_close;
193 if (addr != NULL) {
194 ret = bind(ctx->sock,
195 (const struct sockaddr *)(const void *)addr,
196 sizeof(*addr));
197 if (ret == -1) {
198 ret = errno;
199 goto fail_close;
202 ctx->created_pid = getpid();
204 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
205 ctx->ev_funcs, ctx->sock, POLLIN,
206 unix_dgram_recv_handler, ctx);
208 if (ctx->sock_read_watch == NULL) {
209 ret = ENOMEM;
210 goto fail_close;
214 *result = ctx;
215 return 0;
217 fail_close:
218 close(ctx->sock);
219 fail_free:
220 free(ctx->recv_buf);
221 free(ctx);
222 return ret;
225 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
226 void *private_data)
228 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
229 ssize_t received;
230 struct msghdr msg;
231 struct iovec iov;
233 iov = (struct iovec) {
234 .iov_base = (void *)ctx->recv_buf,
235 .iov_len = ctx->max_msg,
238 msg = (struct msghdr) {
239 .msg_iov = &iov,
240 .msg_iovlen = 1,
241 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
242 .msg_control = NULL,
243 .msg_controllen = 0,
244 #endif
247 received = recvmsg(fd, &msg, 0);
248 if (received == -1) {
249 if ((errno == EAGAIN) ||
250 #ifdef EWOULDBLOCK
251 (errno == EWOULDBLOCK) ||
252 #endif
253 (errno == EINTR) || (errno == ENOMEM)) {
254 /* Not really an error - just try again. */
255 return;
257 /* Problem with the socket. Set it unreadable. */
258 ctx->ev_funcs->watch_update(w, 0);
259 return;
261 if (received > ctx->max_msg) {
262 /* More than we expected, not for us */
263 return;
265 ctx->recv_callback(ctx, ctx->recv_buf, received, ctx->private_data);
268 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
269 void *private_data);
271 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
273 int ret, signalfd;
275 if (ctx->send_pool != NULL) {
276 return 0;
279 ret = pthreadpool_init(0, &ctx->send_pool);
280 if (ret != 0) {
281 return ret;
284 signalfd = pthreadpool_signal_fd(ctx->send_pool);
286 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
287 ctx->ev_funcs, signalfd, POLLIN,
288 unix_dgram_job_finished, ctx);
289 if (ctx->pool_read_watch == NULL) {
290 pthreadpool_destroy(ctx->send_pool);
291 ctx->send_pool = NULL;
292 return ENOMEM;
295 return 0;
298 static int unix_dgram_send_queue_init(
299 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
300 struct unix_dgram_send_queue **result)
302 struct unix_dgram_send_queue *q;
303 size_t pathlen;
304 int ret, err;
306 pathlen = strlen(dst->sun_path)+1;
308 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
309 if (q == NULL) {
310 return ENOMEM;
312 q->ctx = ctx;
313 q->msgs = NULL;
314 memcpy(q->path, dst->sun_path, pathlen);
316 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
317 if (q->sock == -1) {
318 err = errno;
319 goto fail_free;
322 err = prepare_socket_cloexec(q->sock);
323 if (err != 0) {
324 goto fail_close;
327 do {
328 ret = connect(q->sock,
329 (const struct sockaddr *)(const void *)dst,
330 sizeof(*dst));
331 } while ((ret == -1) && (errno == EINTR));
333 if (ret == -1) {
334 err = errno;
335 goto fail_close;
338 err = unix_dgram_init_pthreadpool(ctx);
339 if (err != 0) {
340 goto fail_close;
343 DLIST_ADD(ctx->send_queues, q);
345 *result = q;
346 return 0;
348 fail_close:
349 close(q->sock);
350 fail_free:
351 free(q);
352 return err;
355 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
357 struct unix_dgram_ctx *ctx = q->ctx;
359 while (q->msgs != NULL) {
360 struct unix_dgram_msg *msg;
361 msg = q->msgs;
362 DLIST_REMOVE(q->msgs, msg);
363 free(msg);
365 close(q->sock);
366 DLIST_REMOVE(ctx->send_queues, q);
367 free(q);
370 static struct unix_dgram_send_queue *find_send_queue(
371 struct unix_dgram_ctx *ctx, const char *dst_sock)
373 struct unix_dgram_send_queue *s;
375 for (s = ctx->send_queues; s != NULL; s = s->next) {
376 if (strcmp(s->path, dst_sock) == 0) {
377 return s;
380 return NULL;
383 static int queue_msg(struct unix_dgram_send_queue *q,
384 const struct iovec *iov, int iovlen)
386 struct unix_dgram_msg *msg;
387 ssize_t buflen;
388 size_t msglen;
389 int i;
391 buflen = iov_buflen(iov, iovlen);
392 if (buflen == -1) {
393 return EINVAL;
396 msglen = offsetof(struct unix_dgram_msg, buf) + buflen;
397 if ((msglen < buflen) ||
398 (msglen < offsetof(struct unix_dgram_msg, buf))) {
399 /* overflow */
400 return EINVAL;
403 msg = malloc(msglen);
404 if (msg == NULL) {
405 return ENOMEM;
407 msg->buflen = buflen;
408 msg->sock = q->sock;
410 buflen = 0;
411 for (i=0; i<iovlen; i++) {
412 memcpy(&msg->buf[buflen], iov[i].iov_base, iov[i].iov_len);
413 buflen += iov[i].iov_len;
416 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
417 return 0;
420 static void unix_dgram_send_job(void *private_data)
422 struct unix_dgram_msg *msg = private_data;
424 do {
425 msg->sent = send(msg->sock, msg->buf, msg->buflen, 0);
426 } while ((msg->sent == -1) && (errno == EINTR));
429 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
430 void *private_data)
432 struct unix_dgram_ctx *ctx = private_data;
433 struct unix_dgram_send_queue *q;
434 struct unix_dgram_msg *msg;
435 int ret, job;
437 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
438 if (ret != 1) {
439 return;
442 for (q = ctx->send_queues; q != NULL; q = q->next) {
443 if (job == q->sock) {
444 break;
448 if (q == NULL) {
449 /* Huh? Should not happen */
450 return;
453 msg = q->msgs;
454 DLIST_REMOVE(q->msgs, msg);
455 free(msg);
457 if (q->msgs != NULL) {
458 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
459 unix_dgram_send_job, q->msgs);
460 if (ret == 0) {
461 return;
465 unix_dgram_send_queue_free(q);
468 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
469 const struct sockaddr_un *dst,
470 const struct iovec *iov, int iovlen)
472 struct unix_dgram_send_queue *q;
473 struct msghdr msg;
474 int ret;
477 * To preserve message ordering, we have to queue a message when
478 * others are waiting in line already.
480 q = find_send_queue(ctx, dst->sun_path);
481 if (q != NULL) {
482 return queue_msg(q, iov, iovlen);
486 * Try a cheap nonblocking send
489 msg.msg_name = discard_const_p(struct sockaddr_un, dst);
490 msg.msg_namelen = sizeof(*dst);
491 msg.msg_iov = discard_const_p(struct iovec, iov);
492 msg.msg_iovlen = iovlen;
493 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
494 msg.msg_control = NULL;
495 msg.msg_controllen = 0;
496 #endif
497 msg.msg_flags = 0;
499 ret = sendmsg(ctx->sock, &msg, 0);
500 if (ret >= 0) {
501 return 0;
503 #ifdef EWOULDBLOCK
504 if ((errno != EWOULDBLOCK) && (errno != EAGAIN) && (errno != EINTR)) {
505 #else
506 if ((errno != EAGAIN) && (errno != EINTR)) {
507 #endif
508 return errno;
511 ret = unix_dgram_send_queue_init(ctx, dst, &q);
512 if (ret != 0) {
513 return ret;
515 ret = queue_msg(q, iov, iovlen);
516 if (ret != 0) {
517 unix_dgram_send_queue_free(q);
518 return ret;
520 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
521 unix_dgram_send_job, q->msgs);
522 if (ret != 0) {
523 unix_dgram_send_queue_free(q);
524 return ret;
526 return 0;
529 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
531 return ctx->sock;
534 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
536 if (ctx->send_queues != NULL) {
537 return EBUSY;
540 if (ctx->send_pool != NULL) {
541 int ret = pthreadpool_destroy(ctx->send_pool);
542 if (ret != 0) {
543 return ret;
545 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
548 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
550 if (getpid() == ctx->created_pid) {
551 /* If we created it, unlink. Otherwise someone else might
552 * still have it open */
553 unlink(ctx->path);
556 close(ctx->sock);
557 free(ctx->recv_buf);
558 free(ctx);
559 return 0;
563 * Every message starts with a uint64_t cookie.
565 * A value of 0 indicates a single-fragment message which is complete in
566 * itself. The data immediately follows the cookie.
568 * Every multi-fragment message has a cookie != 0 and starts with a cookie
569 * followed by a struct unix_msg_header and then the data. The pid and sock
570 * fields are used to assure uniqueness on the receiver side.
573 struct unix_msg_hdr {
574 size_t msglen;
575 pid_t pid;
576 int sock;
579 struct unix_msg {
580 struct unix_msg *prev, *next;
581 size_t msglen;
582 size_t received;
583 pid_t sender_pid;
584 int sender_sock;
585 uint64_t cookie;
586 uint8_t buf[1];
589 struct unix_msg_ctx {
590 struct unix_dgram_ctx *dgram;
591 size_t fragment_len;
592 uint64_t cookie;
594 void (*recv_callback)(struct unix_msg_ctx *ctx,
595 uint8_t *msg, size_t msg_len,
596 void *private_data);
597 void *private_data;
599 struct unix_msg *msgs;
602 static void unix_msg_recv(struct unix_dgram_ctx *ctx,
603 uint8_t *msg, size_t msg_len,
604 void *private_data);
606 int unix_msg_init(const char *path, const struct poll_funcs *ev_funcs,
607 size_t fragment_len, uint64_t cookie,
608 void (*recv_callback)(struct unix_msg_ctx *ctx,
609 uint8_t *msg, size_t msg_len,
610 void *private_data),
611 void *private_data,
612 struct unix_msg_ctx **result)
614 struct unix_msg_ctx *ctx;
615 struct sockaddr_un addr;
616 struct sockaddr_un *paddr = NULL;
617 int ret;
619 ctx = malloc(sizeof(*ctx));
620 if (ctx == NULL) {
621 return ENOMEM;
624 if (path != NULL) {
625 size_t pathlen = strlen(path)+1;
627 if (pathlen > sizeof(addr.sun_path)) {
628 return ENAMETOOLONG;
630 addr = (struct sockaddr_un) { .sun_family = AF_UNIX };
631 memcpy(addr.sun_path, path, pathlen);
632 paddr = &addr;
635 ret = unix_dgram_init(paddr, fragment_len, ev_funcs,
636 unix_msg_recv, ctx, &ctx->dgram);
637 if (ret != 0) {
638 free(ctx);
639 return ret;
642 ctx->fragment_len = fragment_len;
643 ctx->cookie = cookie;
644 ctx->recv_callback = recv_callback;
645 ctx->private_data = private_data;
646 ctx->msgs = NULL;
648 *result = ctx;
649 return 0;
652 int unix_msg_send(struct unix_msg_ctx *ctx, const char *dst_sock,
653 const struct iovec *iov, int iovlen)
655 ssize_t msglen;
656 size_t sent;
657 int ret = 0;
658 struct iovec *iov_copy;
659 struct unix_msg_hdr hdr;
660 struct iovec src_iov;
661 struct sockaddr_un dst;
662 size_t dst_len;
664 dst_len = strlen(dst_sock);
665 if (dst_len >= sizeof(dst.sun_path)) {
666 return ENAMETOOLONG;
668 dst = (struct sockaddr_un) { .sun_family = AF_UNIX };
669 memcpy(dst.sun_path, dst_sock, dst_len);
671 if (iovlen < 0) {
672 return EINVAL;
675 msglen = iov_buflen(iov, iovlen);
676 if (msglen == -1) {
677 return EINVAL;
680 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
681 struct iovec tmp_iov[iovlen+1];
682 uint64_t cookie = 0;
684 tmp_iov[0].iov_base = &cookie;
685 tmp_iov[0].iov_len = sizeof(cookie);
686 if (iovlen > 0) {
687 memcpy(&tmp_iov[1], iov,
688 sizeof(struct iovec) * iovlen);
691 return unix_dgram_send(ctx->dgram, &dst, tmp_iov, iovlen+1);
694 hdr.msglen = msglen;
695 hdr.pid = getpid();
696 hdr.sock = unix_dgram_sock(ctx->dgram);
698 iov_copy = malloc(sizeof(struct iovec) * (iovlen + 2));
699 if (iov_copy == NULL) {
700 return ENOMEM;
702 iov_copy[0].iov_base = &ctx->cookie;
703 iov_copy[0].iov_len = sizeof(ctx->cookie);
704 iov_copy[1].iov_base = &hdr;
705 iov_copy[1].iov_len = sizeof(hdr);
707 sent = 0;
708 src_iov = iov[0];
711 * The following write loop sends the user message in pieces. We have
712 * filled the first two iovecs above with "cookie" and "hdr". In the
713 * following loops we pull message chunks from the user iov array and
714 * fill iov_copy piece by piece, possibly truncating chunks from the
715 * caller's iov array. Ugly, but hopefully efficient.
718 while (sent < msglen) {
719 size_t fragment_len;
720 size_t iov_index = 2;
722 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
724 while (fragment_len < ctx->fragment_len) {
725 size_t space, chunk;
727 space = ctx->fragment_len - fragment_len;
728 chunk = MIN(space, src_iov.iov_len);
730 iov_copy[iov_index].iov_base = src_iov.iov_base;
731 iov_copy[iov_index].iov_len = chunk;
732 iov_index += 1;
734 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
735 src_iov.iov_len -= chunk;
736 fragment_len += chunk;
738 if (src_iov.iov_len == 0) {
739 iov += 1;
740 iovlen -= 1;
741 if (iovlen == 0) {
742 break;
744 src_iov = iov[0];
747 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
749 ret = unix_dgram_send(ctx->dgram, &dst, iov_copy, iov_index);
750 if (ret != 0) {
751 break;
755 free(iov_copy);
757 ctx->cookie += 1;
758 if (ctx->cookie == 0) {
759 ctx->cookie += 1;
762 return ret;
765 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
766 uint8_t *buf, size_t buflen,
767 void *private_data)
769 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
770 struct unix_msg_hdr hdr;
771 struct unix_msg *msg;
772 size_t space;
773 uint64_t cookie;
775 if (buflen < sizeof(cookie)) {
776 return;
778 memcpy(&cookie, buf, sizeof(cookie));
780 buf += sizeof(cookie);
781 buflen -= sizeof(cookie);
783 if (cookie == 0) {
784 ctx->recv_callback(ctx, buf, buflen, ctx->private_data);
785 return;
788 if (buflen < sizeof(hdr)) {
789 return;
791 memcpy(&hdr, buf, sizeof(hdr));
793 buf += sizeof(hdr);
794 buflen -= sizeof(hdr);
796 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
797 if ((msg->sender_pid == hdr.pid) &&
798 (msg->sender_sock == hdr.sock)) {
799 break;
803 if ((msg != NULL) && (msg->cookie != cookie)) {
804 DLIST_REMOVE(ctx->msgs, msg);
805 free(msg);
806 msg = NULL;
809 if (msg == NULL) {
810 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
811 if (msg == NULL) {
812 return;
814 msg->msglen = hdr.msglen;
815 msg->received = 0;
816 msg->sender_pid = hdr.pid;
817 msg->sender_sock = hdr.sock;
818 msg->cookie = cookie;
819 DLIST_ADD(ctx->msgs, msg);
822 space = msg->msglen - msg->received;
823 if (buflen > space) {
824 return;
827 memcpy(msg->buf + msg->received, buf, buflen);
828 msg->received += buflen;
830 if (msg->received < msg->msglen) {
831 return;
834 DLIST_REMOVE(ctx->msgs, msg);
835 ctx->recv_callback(ctx, msg->buf, msg->msglen, ctx->private_data);
836 free(msg);
839 int unix_msg_free(struct unix_msg_ctx *ctx)
841 int ret;
843 ret = unix_dgram_free(ctx->dgram);
844 if (ret != 0) {
845 return ret;
848 while (ctx->msgs != NULL) {
849 struct unix_msg *msg = ctx->msgs;
850 DLIST_REMOVE(ctx->msgs, msg);
851 free(msg);
854 free(ctx);
855 return 0;
858 static ssize_t iov_buflen(const struct iovec *iov, int iovlen)
860 size_t buflen = 0;
861 int i;
863 for (i=0; i<iovlen; i++) {
864 size_t thislen = iov[i].iov_len;
865 size_t tmp = buflen + thislen;
867 if ((tmp < buflen) || (tmp < thislen)) {
868 /* overflow */
869 return -1;
871 buflen = tmp;
873 return buflen;