s3:messaging: change unix_dgram_recv_handler() to use recvmsg, not recv
[Samba.git] / source3 / lib / unix_msg / unix_msg.c
blobbcabd28da284304bc9e1724acbf247099cc95525
1 /*
2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "replace.h"
20 #include "unix_msg.h"
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include <fcntl.h>
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg {
40 struct unix_dgram_msg *prev, *next;
42 int sock;
43 ssize_t sent;
44 int sys_errno;
45 size_t buflen;
46 uint8_t buf[1];
49 struct unix_dgram_send_queue {
50 struct unix_dgram_send_queue *prev, *next;
51 struct unix_dgram_ctx *ctx;
52 int sock;
53 struct unix_dgram_msg *msgs;
54 char path[1];
57 struct unix_dgram_ctx {
58 int sock;
59 pid_t created_pid;
60 const struct poll_funcs *ev_funcs;
61 size_t max_msg;
63 void (*recv_callback)(struct unix_dgram_ctx *ctx,
64 uint8_t *msg, size_t msg_len,
65 void *private_data);
66 void *private_data;
68 struct poll_watch *sock_read_watch;
69 struct unix_dgram_send_queue *send_queues;
71 struct pthreadpool *send_pool;
72 struct poll_watch *pool_read_watch;
74 uint8_t *recv_buf;
75 char path[1];
78 static ssize_t iov_buflen(const struct iovec *iov, int iovlen);
79 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
80 void *private_data);
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock)
85 int flags;
86 #ifdef O_NONBLOCK
87 #define FLAG_TO_SET O_NONBLOCK
88 #else
89 #ifdef SYSV
90 #define FLAG_TO_SET O_NDELAY
91 #else /* BSD */
92 #define FLAG_TO_SET FNDELAY
93 #endif
94 #endif
96 flags = fcntl(sock, F_GETFL);
97 if (flags == -1) {
98 return errno;
100 flags |= FLAG_TO_SET;
101 if (fcntl(sock, F_SETFL, flags) == -1) {
102 return errno;
105 #undef FLAG_TO_SET
106 return 0;
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock)
112 #ifdef FD_CLOEXEC
113 int flags;
115 flags = fcntl(sock, F_GETFD, 0);
116 if (flags == -1) {
117 return errno;
119 flags |= FD_CLOEXEC;
120 if (fcntl(sock, F_SETFD, flags) == -1) {
121 return errno;
123 #endif
124 return 0;
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock)
130 int ret = prepare_socket_nonblock(sock);
132 if (ret) {
133 return ret;
135 return prepare_socket_cloexec(sock);
138 static int unix_dgram_init(const char *path, size_t max_msg,
139 const struct poll_funcs *ev_funcs,
140 void (*recv_callback)(struct unix_dgram_ctx *ctx,
141 uint8_t *msg, size_t msg_len,
142 void *private_data),
143 void *private_data,
144 struct unix_dgram_ctx **result)
146 struct unix_dgram_ctx *ctx;
147 struct sockaddr_un addr = { 0, };
148 size_t pathlen;
149 int ret;
151 if (path != NULL) {
152 pathlen = strlen(path)+1;
153 if (pathlen > sizeof(addr.sun_path)) {
154 return ENAMETOOLONG;
156 } else {
157 pathlen = 1;
160 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
161 if (ctx == NULL) {
162 return ENOMEM;
164 if (path != NULL) {
165 memcpy(ctx->path, path, pathlen);
166 } else {
167 ctx->path[0] = '\0';
170 ctx->recv_buf = malloc(max_msg);
171 if (ctx->recv_buf == NULL) {
172 free(ctx);
173 return ENOMEM;
175 ctx->max_msg = max_msg;
176 ctx->ev_funcs = ev_funcs;
177 ctx->recv_callback = recv_callback;
178 ctx->private_data = private_data;
179 ctx->sock_read_watch = NULL;
180 ctx->send_pool = NULL;
181 ctx->pool_read_watch = NULL;
182 ctx->send_queues = NULL;
183 ctx->created_pid = (pid_t)-1;
185 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
186 if (ctx->sock == -1) {
187 ret = errno;
188 goto fail_free;
191 /* Set non-blocking and close-on-exec. */
192 ret = prepare_socket(ctx->sock);
193 if (ret != 0) {
194 goto fail_close;
197 if (path != NULL) {
198 addr.sun_family = AF_UNIX;
199 memcpy(addr.sun_path, path, pathlen);
201 ret = bind(ctx->sock, (struct sockaddr *)(void *)&addr,
202 sizeof(addr));
203 if (ret == -1) {
204 ret = errno;
205 goto fail_close;
208 ctx->created_pid = getpid();
210 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
211 ctx->ev_funcs, ctx->sock, POLLIN,
212 unix_dgram_recv_handler, ctx);
214 if (ctx->sock_read_watch == NULL) {
215 ret = ENOMEM;
216 goto fail_close;
220 *result = ctx;
221 return 0;
223 fail_close:
224 close(ctx->sock);
225 fail_free:
226 free(ctx->recv_buf);
227 free(ctx);
228 return ret;
231 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
232 void *private_data)
234 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
235 ssize_t received;
236 struct msghdr msg;
237 struct iovec iov;
239 iov = (struct iovec) {
240 .iov_base = (void *)ctx->recv_buf,
241 .iov_len = ctx->max_msg,
244 msg = (struct msghdr) {
245 .msg_iov = &iov,
246 .msg_iovlen = 1,
247 .msg_control = NULL,
248 .msg_controllen = 0,
251 received = recvmsg(fd, &msg, 0);
252 if (received == -1) {
253 if ((errno == EAGAIN) ||
254 #ifdef EWOULDBLOCK
255 (errno == EWOULDBLOCK) ||
256 #endif
257 (errno == EINTR) || (errno == ENOMEM)) {
258 /* Not really an error - just try again. */
259 return;
261 /* Problem with the socket. Set it unreadable. */
262 ctx->ev_funcs->watch_update(w, 0);
263 return;
265 if (received > ctx->max_msg) {
266 /* More than we expected, not for us */
267 return;
269 ctx->recv_callback(ctx, ctx->recv_buf, received, ctx->private_data);
272 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
273 void *private_data);
275 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
277 int ret, signalfd;
279 if (ctx->send_pool != NULL) {
280 return 0;
283 ret = pthreadpool_init(0, &ctx->send_pool);
284 if (ret != 0) {
285 return ret;
288 signalfd = pthreadpool_signal_fd(ctx->send_pool);
290 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
291 ctx->ev_funcs, signalfd, POLLIN,
292 unix_dgram_job_finished, ctx);
293 if (ctx->pool_read_watch == NULL) {
294 pthreadpool_destroy(ctx->send_pool);
295 ctx->send_pool = NULL;
296 return ENOMEM;
299 return 0;
302 static int unix_dgram_send_queue_init(
303 struct unix_dgram_ctx *ctx, const char *path,
304 struct unix_dgram_send_queue **result)
306 struct unix_dgram_send_queue *q;
307 struct sockaddr_un addr = { 0, };
308 size_t pathlen;
309 int ret, err;
311 pathlen = strlen(path)+1;
313 if (pathlen > sizeof(addr.sun_path)) {
314 return ENAMETOOLONG;
317 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
318 if (q == NULL) {
319 return ENOMEM;
321 q->ctx = ctx;
322 q->msgs = NULL;
323 memcpy(q->path, path, pathlen);
325 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
326 if (q->sock == -1) {
327 err = errno;
328 goto fail_free;
331 err = prepare_socket_cloexec(q->sock);
332 if (err != 0) {
333 goto fail_close;
336 addr.sun_family = AF_UNIX;
337 memcpy(addr.sun_path, path, pathlen+1);
339 do {
340 ret = connect(q->sock, (struct sockaddr *)&addr, sizeof(addr));
341 } while ((ret == -1) && (errno == EINTR));
343 if (ret == -1) {
344 err = errno;
345 goto fail_close;
348 err = unix_dgram_init_pthreadpool(ctx);
349 if (err != 0) {
350 goto fail_close;
353 DLIST_ADD(ctx->send_queues, q);
355 *result = q;
356 return 0;
358 fail_close:
359 close(q->sock);
360 fail_free:
361 free(q);
362 return err;
365 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
367 struct unix_dgram_ctx *ctx = q->ctx;
369 while (q->msgs != NULL) {
370 struct unix_dgram_msg *msg;
371 msg = q->msgs;
372 DLIST_REMOVE(q->msgs, msg);
373 free(msg);
375 close(q->sock);
376 DLIST_REMOVE(ctx->send_queues, q);
377 free(q);
380 static struct unix_dgram_send_queue *find_send_queue(
381 struct unix_dgram_ctx *ctx, const char *dst_sock)
383 struct unix_dgram_send_queue *s;
385 for (s = ctx->send_queues; s != NULL; s = s->next) {
386 if (strcmp(s->path, dst_sock) == 0) {
387 return s;
390 return NULL;
393 static int queue_msg(struct unix_dgram_send_queue *q,
394 const struct iovec *iov, int iovlen)
396 struct unix_dgram_msg *msg;
397 ssize_t buflen;
398 size_t msglen;
399 int i;
401 buflen = iov_buflen(iov, iovlen);
402 if (buflen == -1) {
403 return EINVAL;
406 msglen = offsetof(struct unix_dgram_msg, buf) + buflen;
407 if ((msglen < buflen) ||
408 (msglen < offsetof(struct unix_dgram_msg, buf))) {
409 /* overflow */
410 return EINVAL;
413 msg = malloc(msglen);
414 if (msg == NULL) {
415 return ENOMEM;
417 msg->buflen = buflen;
418 msg->sock = q->sock;
420 buflen = 0;
421 for (i=0; i<iovlen; i++) {
422 memcpy(&msg->buf[buflen], iov[i].iov_base, iov[i].iov_len);
423 buflen += iov[i].iov_len;
426 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
427 return 0;
430 static void unix_dgram_send_job(void *private_data)
432 struct unix_dgram_msg *msg = private_data;
434 do {
435 msg->sent = send(msg->sock, msg->buf, msg->buflen, 0);
436 } while ((msg->sent == -1) && (errno == EINTR));
439 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
440 void *private_data)
442 struct unix_dgram_ctx *ctx = private_data;
443 struct unix_dgram_send_queue *q;
444 struct unix_dgram_msg *msg;
445 int ret, job;
447 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
448 if (ret != 1) {
449 return;
452 for (q = ctx->send_queues; q != NULL; q = q->next) {
453 if (job == q->sock) {
454 break;
458 if (q == NULL) {
459 /* Huh? Should not happen */
460 return;
463 msg = q->msgs;
464 DLIST_REMOVE(q->msgs, msg);
465 free(msg);
467 if (q->msgs != NULL) {
468 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
469 unix_dgram_send_job, q->msgs);
470 if (ret == 0) {
471 return;
475 unix_dgram_send_queue_free(q);
478 static int unix_dgram_send(struct unix_dgram_ctx *ctx, const char *dst_sock,
479 const struct iovec *iov, int iovlen)
481 struct unix_dgram_send_queue *q;
482 struct sockaddr_un addr = { 0, };
483 struct msghdr msg;
484 size_t dst_len;
485 int ret;
487 dst_len = strlen(dst_sock);
488 if (dst_len >= sizeof(addr.sun_path)) {
489 return ENAMETOOLONG;
493 * To preserve message ordering, we have to queue a message when
494 * others are waiting in line already.
496 q = find_send_queue(ctx, dst_sock);
497 if (q != NULL) {
498 return queue_msg(q, iov, iovlen);
502 * Try a cheap nonblocking send
505 addr.sun_family = AF_UNIX;
506 memcpy(addr.sun_path, dst_sock, dst_len);
508 msg.msg_name = &addr;
509 msg.msg_namelen = sizeof(addr);
510 msg.msg_iov = discard_const_p(struct iovec, iov);
511 msg.msg_iovlen = iovlen;
512 msg.msg_control = NULL;
513 msg.msg_controllen = 0;
514 msg.msg_flags = 0;
516 ret = sendmsg(ctx->sock, &msg, 0);
517 if (ret >= 0) {
518 return 0;
520 #ifdef EWOULDBLOCK
521 if ((errno != EWOULDBLOCK) && (errno != EAGAIN) && (errno != EINTR)) {
522 #else
523 if ((errno != EAGAIN) && (errno != EINTR)) {
524 #endif
525 return errno;
528 ret = unix_dgram_send_queue_init(ctx, dst_sock, &q);
529 if (ret != 0) {
530 return ret;
532 ret = queue_msg(q, iov, iovlen);
533 if (ret != 0) {
534 unix_dgram_send_queue_free(q);
535 return ret;
537 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
538 unix_dgram_send_job, q->msgs);
539 if (ret != 0) {
540 unix_dgram_send_queue_free(q);
541 return ret;
543 return 0;
546 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
548 return ctx->sock;
551 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
553 if (ctx->send_queues != NULL) {
554 return EBUSY;
557 if (ctx->send_pool != NULL) {
558 int ret = pthreadpool_destroy(ctx->send_pool);
559 if (ret != 0) {
560 return ret;
562 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
565 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
567 if (getpid() == ctx->created_pid) {
568 /* If we created it, unlink. Otherwise someone else might
569 * still have it open */
570 unlink(ctx->path);
573 close(ctx->sock);
574 free(ctx->recv_buf);
575 free(ctx);
576 return 0;
580 * Every message starts with a uint64_t cookie.
582 * A value of 0 indicates a single-fragment message which is complete in
583 * itself. The data immediately follows the cookie.
585 * Every multi-fragment message has a cookie != 0 and starts with a cookie
586 * followed by a struct unix_msg_header and then the data. The pid and sock
587 * fields are used to assure uniqueness on the receiver side.
590 struct unix_msg_hdr {
591 size_t msglen;
592 pid_t pid;
593 int sock;
596 struct unix_msg {
597 struct unix_msg *prev, *next;
598 size_t msglen;
599 size_t received;
600 pid_t sender_pid;
601 int sender_sock;
602 uint64_t cookie;
603 uint8_t buf[1];
606 struct unix_msg_ctx {
607 struct unix_dgram_ctx *dgram;
608 size_t fragment_len;
609 uint64_t cookie;
611 void (*recv_callback)(struct unix_msg_ctx *ctx,
612 uint8_t *msg, size_t msg_len,
613 void *private_data);
614 void *private_data;
616 struct unix_msg *msgs;
619 static void unix_msg_recv(struct unix_dgram_ctx *ctx,
620 uint8_t *msg, size_t msg_len,
621 void *private_data);
623 int unix_msg_init(const char *path, const struct poll_funcs *ev_funcs,
624 size_t fragment_len, uint64_t cookie,
625 void (*recv_callback)(struct unix_msg_ctx *ctx,
626 uint8_t *msg, size_t msg_len,
627 void *private_data),
628 void *private_data,
629 struct unix_msg_ctx **result)
631 struct unix_msg_ctx *ctx;
632 int ret;
634 ctx = malloc(sizeof(*ctx));
635 if (ctx == NULL) {
636 return ENOMEM;
639 ret = unix_dgram_init(path, fragment_len, ev_funcs,
640 unix_msg_recv, ctx, &ctx->dgram);
641 if (ret != 0) {
642 free(ctx);
643 return ret;
646 ctx->fragment_len = fragment_len;
647 ctx->cookie = cookie;
648 ctx->recv_callback = recv_callback;
649 ctx->private_data = private_data;
650 ctx->msgs = NULL;
652 *result = ctx;
653 return 0;
656 int unix_msg_send(struct unix_msg_ctx *ctx, const char *dst_sock,
657 const struct iovec *iov, int iovlen)
659 ssize_t msglen;
660 size_t sent;
661 int ret = 0;
662 struct iovec *iov_copy;
663 struct unix_msg_hdr hdr;
664 struct iovec src_iov;
666 if (iovlen < 0) {
667 return EINVAL;
670 msglen = iov_buflen(iov, iovlen);
671 if (msglen == -1) {
672 return EINVAL;
675 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
676 struct iovec tmp_iov[iovlen+1];
677 uint64_t cookie = 0;
679 tmp_iov[0].iov_base = &cookie;
680 tmp_iov[0].iov_len = sizeof(cookie);
681 if (iovlen > 0) {
682 memcpy(&tmp_iov[1], iov,
683 sizeof(struct iovec) * iovlen);
686 return unix_dgram_send(ctx->dgram, dst_sock, tmp_iov,
687 iovlen+1);
690 hdr.msglen = msglen;
691 hdr.pid = getpid();
692 hdr.sock = unix_dgram_sock(ctx->dgram);
694 iov_copy = malloc(sizeof(struct iovec) * (iovlen + 2));
695 if (iov_copy == NULL) {
696 return ENOMEM;
698 iov_copy[0].iov_base = &ctx->cookie;
699 iov_copy[0].iov_len = sizeof(ctx->cookie);
700 iov_copy[1].iov_base = &hdr;
701 iov_copy[1].iov_len = sizeof(hdr);
703 sent = 0;
704 src_iov = iov[0];
707 * The following write loop sends the user message in pieces. We have
708 * filled the first two iovecs above with "cookie" and "hdr". In the
709 * following loops we pull message chunks from the user iov array and
710 * fill iov_copy piece by piece, possibly truncating chunks from the
711 * caller's iov array. Ugly, but hopefully efficient.
714 while (sent < msglen) {
715 size_t fragment_len;
716 size_t iov_index = 2;
718 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
720 while (fragment_len < ctx->fragment_len) {
721 size_t space, chunk;
723 space = ctx->fragment_len - fragment_len;
724 chunk = MIN(space, src_iov.iov_len);
726 iov_copy[iov_index].iov_base = src_iov.iov_base;
727 iov_copy[iov_index].iov_len = chunk;
728 iov_index += 1;
730 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
731 src_iov.iov_len -= chunk;
732 fragment_len += chunk;
734 if (src_iov.iov_len == 0) {
735 iov += 1;
736 iovlen -= 1;
737 if (iovlen == 0) {
738 break;
740 src_iov = iov[0];
743 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
745 ret = unix_dgram_send(ctx->dgram, dst_sock,
746 iov_copy, iov_index);
747 if (ret != 0) {
748 break;
752 free(iov_copy);
754 ctx->cookie += 1;
755 if (ctx->cookie == 0) {
756 ctx->cookie += 1;
759 return ret;
762 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
763 uint8_t *buf, size_t buflen,
764 void *private_data)
766 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
767 struct unix_msg_hdr hdr;
768 struct unix_msg *msg;
769 size_t space;
770 uint64_t cookie;
772 if (buflen < sizeof(cookie)) {
773 return;
775 memcpy(&cookie, buf, sizeof(cookie));
777 buf += sizeof(cookie);
778 buflen -= sizeof(cookie);
780 if (cookie == 0) {
781 ctx->recv_callback(ctx, buf, buflen, ctx->private_data);
782 return;
785 if (buflen < sizeof(hdr)) {
786 return;
788 memcpy(&hdr, buf, sizeof(hdr));
790 buf += sizeof(hdr);
791 buflen -= sizeof(hdr);
793 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
794 if ((msg->sender_pid == hdr.pid) &&
795 (msg->sender_sock == hdr.sock)) {
796 break;
800 if ((msg != NULL) && (msg->cookie != cookie)) {
801 DLIST_REMOVE(ctx->msgs, msg);
802 free(msg);
803 msg = NULL;
806 if (msg == NULL) {
807 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
808 if (msg == NULL) {
809 return;
811 msg->msglen = hdr.msglen;
812 msg->received = 0;
813 msg->sender_pid = hdr.pid;
814 msg->sender_sock = hdr.sock;
815 msg->cookie = cookie;
816 DLIST_ADD(ctx->msgs, msg);
819 space = msg->msglen - msg->received;
820 if (buflen > space) {
821 return;
824 memcpy(msg->buf + msg->received, buf, buflen);
825 msg->received += buflen;
827 if (msg->received < msg->msglen) {
828 return;
831 DLIST_REMOVE(ctx->msgs, msg);
832 ctx->recv_callback(ctx, msg->buf, msg->msglen, ctx->private_data);
833 free(msg);
836 int unix_msg_free(struct unix_msg_ctx *ctx)
838 int ret;
840 ret = unix_dgram_free(ctx->dgram);
841 if (ret != 0) {
842 return ret;
845 while (ctx->msgs != NULL) {
846 struct unix_msg *msg = ctx->msgs;
847 DLIST_REMOVE(ctx->msgs, msg);
848 free(msg);
851 free(ctx);
852 return 0;
855 static ssize_t iov_buflen(const struct iovec *iov, int iovlen)
857 size_t buflen = 0;
858 int i;
860 for (i=0; i<iovlen; i++) {
861 size_t thislen = iov[i].iov_len;
862 size_t tmp = buflen + thislen;
864 if ((tmp < buflen) || (tmp < thislen)) {
865 /* overflow */
866 return -1;
868 buflen = tmp;
870 return buflen;