2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include "lib/util/iov_buf.h"
27 #include "lib/msghdr.h"
31 * This file implements two abstractions: The "unix_dgram" functions implement
32 * queueing for unix domain datagram sockets. You can send to a destination
33 * socket, and if that has no free space available, it will fall back to an
34 * anonymous socket that will poll for writability. "unix_dgram" expects the
35 * data size not to exceed the system limit.
37 * The "unix_msg" functions implement the fragmentation of large messages on
38 * top of "unix_dgram". This is what is exposed to the user of this API.
41 struct unix_dgram_msg
{
42 struct unix_dgram_msg
*prev
, *next
;
49 struct unix_dgram_send_queue
{
50 struct unix_dgram_send_queue
*prev
, *next
;
51 struct unix_dgram_ctx
*ctx
;
53 struct unix_dgram_msg
*msgs
;
57 struct unix_dgram_ctx
{
60 const struct poll_funcs
*ev_funcs
;
63 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
64 uint8_t *msg
, size_t msg_len
,
65 int *fds
, size_t num_fds
,
69 struct poll_watch
*sock_read_watch
;
70 struct unix_dgram_send_queue
*send_queues
;
72 struct pthreadpool
*send_pool
;
73 struct poll_watch
*pool_read_watch
;
79 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock
)
87 #define FLAG_TO_SET O_NONBLOCK
90 #define FLAG_TO_SET O_NDELAY
92 #define FLAG_TO_SET FNDELAY
96 flags
= fcntl(sock
, F_GETFL
);
100 flags
|= FLAG_TO_SET
;
101 if (fcntl(sock
, F_SETFL
, flags
) == -1) {
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock
)
115 flags
= fcntl(sock
, F_GETFD
, 0);
120 if (fcntl(sock
, F_SETFD
, flags
) == -1) {
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock
)
130 int ret
= prepare_socket_nonblock(sock
);
135 return prepare_socket_cloexec(sock
);
138 static size_t unix_dgram_msg_size(void)
140 size_t msgsize
= sizeof(struct unix_dgram_msg
);
141 msgsize
= (msgsize
+ 15) & ~15; /* align to 16 */
145 static struct msghdr_buf
*unix_dgram_msghdr(struct unix_dgram_msg
*msg
)
148 * Not portable in C99, but "msg" is aligned and so is
149 * unix_dgram_msg_size()
151 return (struct msghdr_buf
*)(((char *)msg
) + unix_dgram_msg_size());
154 static void close_fd_array(int *fds
, size_t num_fds
)
158 for (i
= 0; i
< num_fds
; i
++) {
168 static void close_fd_array_dgram_msg(struct unix_dgram_msg
*dmsg
)
170 struct msghdr_buf
*hdr
= unix_dgram_msghdr(dmsg
);
171 struct msghdr
*msg
= msghdr_buf_msghdr(hdr
);
172 size_t num_fds
= msghdr_extract_fds(msg
, NULL
, 0);
175 msghdr_extract_fds(msg
, fds
, num_fds
);
177 close_fd_array(fds
, num_fds
);
180 static int unix_dgram_init(const struct sockaddr_un
*addr
, size_t max_msg
,
181 const struct poll_funcs
*ev_funcs
,
182 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
183 uint8_t *msg
, size_t msg_len
,
184 int *fds
, size_t num_fds
,
187 struct unix_dgram_ctx
**result
)
189 struct unix_dgram_ctx
*ctx
;
194 pathlen
= strlen(addr
->sun_path
)+1;
199 ctx
= malloc(offsetof(struct unix_dgram_ctx
, path
) + pathlen
);
204 memcpy(ctx
->path
, addr
->sun_path
, pathlen
);
209 *ctx
= (struct unix_dgram_ctx
) {
211 .ev_funcs
= ev_funcs
,
212 .recv_callback
= recv_callback
,
213 .private_data
= private_data
,
214 .created_pid
= (pid_t
)-1
217 ctx
->recv_buf
= malloc(max_msg
);
218 if (ctx
->recv_buf
== NULL
) {
223 ctx
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
224 if (ctx
->sock
== -1) {
229 /* Set non-blocking and close-on-exec. */
230 ret
= prepare_socket(ctx
->sock
);
236 ret
= bind(ctx
->sock
,
237 (const struct sockaddr
*)(const void *)addr
,
244 ctx
->created_pid
= getpid();
246 ctx
->sock_read_watch
= ctx
->ev_funcs
->watch_new(
247 ctx
->ev_funcs
, ctx
->sock
, POLLIN
,
248 unix_dgram_recv_handler
, ctx
);
250 if (ctx
->sock_read_watch
== NULL
) {
267 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
270 struct unix_dgram_ctx
*ctx
= (struct unix_dgram_ctx
*)private_data
;
275 size_t bufsize
= msghdr_prep_recv_fds(NULL
, NULL
, 0, INT8_MAX
);
276 uint8_t buf
[bufsize
];
278 iov
= (struct iovec
) {
279 .iov_base
= (void *)ctx
->recv_buf
,
280 .iov_len
= ctx
->max_msg
,
283 msg
= (struct msghdr
) {
288 msghdr_prep_recv_fds(&msg
, buf
, bufsize
, INT8_MAX
);
290 #ifdef MSG_CMSG_CLOEXEC
291 flags
|= MSG_CMSG_CLOEXEC
;
294 received
= recvmsg(fd
, &msg
, flags
);
295 if (received
== -1) {
296 if ((errno
== EAGAIN
) ||
297 (errno
== EWOULDBLOCK
) ||
298 (errno
== EINTR
) || (errno
== ENOMEM
)) {
299 /* Not really an error - just try again. */
302 /* Problem with the socket. Set it unreadable. */
303 ctx
->ev_funcs
->watch_update(w
, 0);
306 if (received
> ctx
->max_msg
) {
307 /* More than we expected, not for us */
312 size_t num_fds
= msghdr_extract_fds(&msg
, NULL
, 0);
316 msghdr_extract_fds(&msg
, fds
, num_fds
);
318 for (i
= 0; i
< num_fds
; i
++) {
321 err
= prepare_socket_cloexec(fds
[i
]);
323 close_fd_array(fds
, num_fds
);
328 ctx
->recv_callback(ctx
, ctx
->recv_buf
, received
,
329 fds
, num_fds
, ctx
->private_data
);
333 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
336 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx
*ctx
)
340 if (ctx
->send_pool
!= NULL
) {
344 ret
= pthreadpool_init(0, &ctx
->send_pool
);
349 signalfd
= pthreadpool_signal_fd(ctx
->send_pool
);
351 ctx
->pool_read_watch
= ctx
->ev_funcs
->watch_new(
352 ctx
->ev_funcs
, signalfd
, POLLIN
,
353 unix_dgram_job_finished
, ctx
);
354 if (ctx
->pool_read_watch
== NULL
) {
355 pthreadpool_destroy(ctx
->send_pool
);
356 ctx
->send_pool
= NULL
;
363 static int unix_dgram_send_queue_init(
364 struct unix_dgram_ctx
*ctx
, const struct sockaddr_un
*dst
,
365 struct unix_dgram_send_queue
**result
)
367 struct unix_dgram_send_queue
*q
;
371 pathlen
= strlen(dst
->sun_path
)+1;
373 q
= malloc(offsetof(struct unix_dgram_send_queue
, path
) + pathlen
);
379 memcpy(q
->path
, dst
->sun_path
, pathlen
);
381 q
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
387 err
= prepare_socket_cloexec(q
->sock
);
393 ret
= connect(q
->sock
,
394 (const struct sockaddr
*)(const void *)dst
,
396 } while ((ret
== -1) && (errno
== EINTR
));
403 err
= unix_dgram_init_pthreadpool(ctx
);
408 DLIST_ADD(ctx
->send_queues
, q
);
420 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue
*q
)
422 struct unix_dgram_ctx
*ctx
= q
->ctx
;
424 while (q
->msgs
!= NULL
) {
425 struct unix_dgram_msg
*msg
;
427 DLIST_REMOVE(q
->msgs
, msg
);
428 close_fd_array_dgram_msg(msg
);
432 DLIST_REMOVE(ctx
->send_queues
, q
);
436 static struct unix_dgram_send_queue
*find_send_queue(
437 struct unix_dgram_ctx
*ctx
, const char *dst_sock
)
439 struct unix_dgram_send_queue
*s
;
441 for (s
= ctx
->send_queues
; s
!= NULL
; s
= s
->next
) {
442 if (strcmp(s
->path
, dst_sock
) == 0) {
449 static int queue_msg(struct unix_dgram_send_queue
*q
,
450 const struct iovec
*iov
, int iovcnt
,
451 const int *fds
, size_t num_fds
)
453 struct unix_dgram_msg
*msg
;
454 struct msghdr_buf
*hdr
;
455 size_t msglen
, needed
;
457 int fds_copy
[MIN(num_fds
, INT8_MAX
)];
460 for (i
=0; i
<num_fds
; i
++) {
464 for (i
= 0; i
< num_fds
; i
++) {
465 fds_copy
[i
] = dup(fds
[i
]);
466 if (fds_copy
[i
] == -1) {
472 msglen
= unix_dgram_msg_size();
474 msghdrlen
= msghdr_copy(NULL
, 0, NULL
, 0, iov
, iovcnt
,
476 if (msghdrlen
== -1) {
481 needed
= msglen
+ msghdrlen
;
482 if (needed
< msglen
) {
487 msg
= malloc(needed
);
492 hdr
= unix_dgram_msghdr(msg
);
495 msghdr_copy(hdr
, msghdrlen
, NULL
, 0, iov
, iovcnt
,
498 DLIST_ADD_END(q
->msgs
, msg
, struct unix_dgram_msg
);
501 close_fd_array(fds_copy
, num_fds
);
505 static void unix_dgram_send_job(void *private_data
)
507 struct unix_dgram_msg
*dmsg
= private_data
;
510 struct msghdr_buf
*hdr
= unix_dgram_msghdr(dmsg
);
511 struct msghdr
*msg
= msghdr_buf_msghdr(hdr
);
512 dmsg
->sent
= sendmsg(dmsg
->sock
, msg
, 0);
513 } while ((dmsg
->sent
== -1) && (errno
== EINTR
));
515 if (dmsg
->sent
== -1) {
516 dmsg
->sys_errno
= errno
;
520 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
523 struct unix_dgram_ctx
*ctx
= private_data
;
524 struct unix_dgram_send_queue
*q
;
525 struct unix_dgram_msg
*msg
;
528 ret
= pthreadpool_finished_jobs(ctx
->send_pool
, &job
, 1);
533 for (q
= ctx
->send_queues
; q
!= NULL
; q
= q
->next
) {
534 if (job
== q
->sock
) {
540 /* Huh? Should not happen */
545 DLIST_REMOVE(q
->msgs
, msg
);
546 close_fd_array_dgram_msg(msg
);
549 if (q
->msgs
!= NULL
) {
550 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
551 unix_dgram_send_job
, q
->msgs
);
557 unix_dgram_send_queue_free(q
);
560 static int unix_dgram_send(struct unix_dgram_ctx
*ctx
,
561 const struct sockaddr_un
*dst
,
562 const struct iovec
*iov
, int iovlen
,
563 const int *fds
, size_t num_fds
)
565 struct unix_dgram_send_queue
*q
;
571 if (num_fds
> INT8_MAX
) {
575 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
579 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
581 for (i
= 0; i
< num_fds
; i
++) {
583 * Make sure we only allow fd passing
584 * for communication channels,
585 * e.g. sockets, pipes, fifos, ...
587 ret
= lseek(fds
[i
], 0, SEEK_CUR
);
588 if (ret
== -1 && errno
== ESPIPE
) {
594 * Reject the message as we may need to call dup(),
595 * if we queue the message.
597 * That might result in unexpected behavior for the caller
598 * for files and broken posix locking.
604 * To preserve message ordering, we have to queue a message when
605 * others are waiting in line already.
607 q
= find_send_queue(ctx
, dst
->sun_path
);
609 return queue_msg(q
, iov
, iovlen
, fds
, num_fds
);
613 * Try a cheap nonblocking send
616 msg
= (struct msghdr
) {
617 .msg_name
= discard_const_p(struct sockaddr_un
, dst
),
618 .msg_namelen
= sizeof(*dst
),
619 .msg_iov
= discard_const_p(struct iovec
, iov
),
623 fdlen
= msghdr_prep_fds(&msg
, NULL
, 0, fds
, num_fds
);
630 msghdr_prep_fds(&msg
, buf
, fdlen
, fds
, num_fds
);
632 ret
= sendmsg(ctx
->sock
, &msg
, 0);
638 if ((errno
!= EWOULDBLOCK
) &&
641 /* FreeBSD can give this for large messages */
642 (errno
!= ENOBUFS
) &&
648 ret
= unix_dgram_send_queue_init(ctx
, dst
, &q
);
652 ret
= queue_msg(q
, iov
, iovlen
, fds
, num_fds
);
654 unix_dgram_send_queue_free(q
);
657 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
658 unix_dgram_send_job
, q
->msgs
);
660 unix_dgram_send_queue_free(q
);
666 static int unix_dgram_sock(struct unix_dgram_ctx
*ctx
)
671 static int unix_dgram_free(struct unix_dgram_ctx
*ctx
)
673 if (ctx
->send_queues
!= NULL
) {
677 if (ctx
->send_pool
!= NULL
) {
678 int ret
= pthreadpool_destroy(ctx
->send_pool
);
682 ctx
->ev_funcs
->watch_free(ctx
->pool_read_watch
);
685 ctx
->ev_funcs
->watch_free(ctx
->sock_read_watch
);
687 if (getpid() == ctx
->created_pid
) {
688 /* If we created it, unlink. Otherwise someone else might
689 * still have it open */
700 * Every message starts with a uint64_t cookie.
702 * A value of 0 indicates a single-fragment message which is complete in
703 * itself. The data immediately follows the cookie.
705 * Every multi-fragment message has a cookie != 0 and starts with a cookie
706 * followed by a struct unix_msg_header and then the data. The pid and sock
707 * fields are used to assure uniqueness on the receiver side.
710 struct unix_msg_hdr
{
717 struct unix_msg
*prev
, *next
;
726 struct unix_msg_ctx
{
727 struct unix_dgram_ctx
*dgram
;
731 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
732 uint8_t *msg
, size_t msg_len
,
733 int *fds
, size_t num_fds
,
737 struct unix_msg
*msgs
;
740 static void unix_msg_recv(struct unix_dgram_ctx
*dgram_ctx
,
741 uint8_t *buf
, size_t buflen
,
742 int *fds
, size_t num_fds
,
745 int unix_msg_init(const struct sockaddr_un
*addr
,
746 const struct poll_funcs
*ev_funcs
,
748 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
749 uint8_t *msg
, size_t msg_len
,
750 int *fds
, size_t num_fds
,
753 struct unix_msg_ctx
**result
)
755 struct unix_msg_ctx
*ctx
;
758 ctx
= malloc(sizeof(*ctx
));
763 *ctx
= (struct unix_msg_ctx
) {
764 .fragment_len
= fragment_len
,
766 .recv_callback
= recv_callback
,
767 .private_data
= private_data
770 ret
= unix_dgram_init(addr
, fragment_len
, ev_funcs
,
771 unix_msg_recv
, ctx
, &ctx
->dgram
);
781 int unix_msg_send(struct unix_msg_ctx
*ctx
, const struct sockaddr_un
*dst
,
782 const struct iovec
*iov
, int iovlen
,
783 const int *fds
, size_t num_fds
)
788 struct iovec iov_copy
[iovlen
+2];
789 struct unix_msg_hdr hdr
;
790 struct iovec src_iov
;
796 msglen
= iov_buflen(iov
, iovlen
);
801 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
805 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
807 if (num_fds
> INT8_MAX
) {
811 if (msglen
<= (ctx
->fragment_len
- sizeof(uint64_t))) {
814 iov_copy
[0].iov_base
= &cookie
;
815 iov_copy
[0].iov_len
= sizeof(cookie
);
817 memcpy(&iov_copy
[1], iov
,
818 sizeof(struct iovec
) * iovlen
);
821 return unix_dgram_send(ctx
->dgram
, dst
, iov_copy
, iovlen
+1,
825 hdr
= (struct unix_msg_hdr
) {
828 .sock
= unix_dgram_sock(ctx
->dgram
)
831 iov_copy
[0].iov_base
= &ctx
->cookie
;
832 iov_copy
[0].iov_len
= sizeof(ctx
->cookie
);
833 iov_copy
[1].iov_base
= &hdr
;
834 iov_copy
[1].iov_len
= sizeof(hdr
);
840 * The following write loop sends the user message in pieces. We have
841 * filled the first two iovecs above with "cookie" and "hdr". In the
842 * following loops we pull message chunks from the user iov array and
843 * fill iov_copy piece by piece, possibly truncating chunks from the
844 * caller's iov array. Ugly, but hopefully efficient.
847 while (sent
< msglen
) {
849 size_t iov_index
= 2;
851 fragment_len
= sizeof(ctx
->cookie
) + sizeof(hdr
);
853 while (fragment_len
< ctx
->fragment_len
) {
856 space
= ctx
->fragment_len
- fragment_len
;
857 chunk
= MIN(space
, src_iov
.iov_len
);
859 iov_copy
[iov_index
].iov_base
= src_iov
.iov_base
;
860 iov_copy
[iov_index
].iov_len
= chunk
;
863 src_iov
.iov_base
= (char *)src_iov
.iov_base
+ chunk
;
864 src_iov
.iov_len
-= chunk
;
865 fragment_len
+= chunk
;
867 if (src_iov
.iov_len
== 0) {
876 sent
+= (fragment_len
- sizeof(ctx
->cookie
) - sizeof(hdr
));
879 * only the last fragment should pass the fd array.
880 * That simplifies the receiver a lot.
883 ret
= unix_dgram_send(ctx
->dgram
, dst
,
887 ret
= unix_dgram_send(ctx
->dgram
, dst
,
897 if (ctx
->cookie
== 0) {
904 static void unix_msg_recv(struct unix_dgram_ctx
*dgram_ctx
,
905 uint8_t *buf
, size_t buflen
,
906 int *fds
, size_t num_fds
,
909 struct unix_msg_ctx
*ctx
= (struct unix_msg_ctx
*)private_data
;
910 struct unix_msg_hdr hdr
;
911 struct unix_msg
*msg
;
915 if (buflen
< sizeof(cookie
)) {
919 memcpy(&cookie
, buf
, sizeof(cookie
));
921 buf
+= sizeof(cookie
);
922 buflen
-= sizeof(cookie
);
925 ctx
->recv_callback(ctx
, buf
, buflen
, fds
, num_fds
,
930 if (buflen
< sizeof(hdr
)) {
933 memcpy(&hdr
, buf
, sizeof(hdr
));
936 buflen
-= sizeof(hdr
);
938 for (msg
= ctx
->msgs
; msg
!= NULL
; msg
= msg
->next
) {
939 if ((msg
->sender_pid
== hdr
.pid
) &&
940 (msg
->sender_sock
== hdr
.sock
)) {
945 if ((msg
!= NULL
) && (msg
->cookie
!= cookie
)) {
946 DLIST_REMOVE(ctx
->msgs
, msg
);
952 msg
= malloc(offsetof(struct unix_msg
, buf
) + hdr
.msglen
);
956 *msg
= (struct unix_msg
) {
957 .msglen
= hdr
.msglen
,
958 .sender_pid
= hdr
.pid
,
959 .sender_sock
= hdr
.sock
,
962 DLIST_ADD(ctx
->msgs
, msg
);
965 space
= msg
->msglen
- msg
->received
;
966 if (buflen
> space
) {
970 memcpy(msg
->buf
+ msg
->received
, buf
, buflen
);
971 msg
->received
+= buflen
;
973 if (msg
->received
< msg
->msglen
) {
977 DLIST_REMOVE(ctx
->msgs
, msg
);
978 ctx
->recv_callback(ctx
, msg
->buf
, msg
->msglen
, fds
, num_fds
,
984 close_fd_array(fds
, num_fds
);
987 int unix_msg_free(struct unix_msg_ctx
*ctx
)
991 ret
= unix_dgram_free(ctx
->dgram
);
996 while (ctx
->msgs
!= NULL
) {
997 struct unix_msg
*msg
= ctx
->msgs
;
998 DLIST_REMOVE(ctx
->msgs
, msg
);