2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg
{
40 struct unix_dgram_msg
*prev
, *next
;
49 struct unix_dgram_send_queue
{
50 struct unix_dgram_send_queue
*prev
, *next
;
51 struct unix_dgram_ctx
*ctx
;
53 struct unix_dgram_msg
*msgs
;
57 struct unix_dgram_ctx
{
60 const struct poll_funcs
*ev_funcs
;
63 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
64 uint8_t *msg
, size_t msg_len
,
68 struct poll_watch
*sock_read_watch
;
69 struct unix_dgram_send_queue
*send_queues
;
71 struct pthreadpool
*send_pool
;
72 struct poll_watch
*pool_read_watch
;
78 static ssize_t
iov_buflen(const struct iovec
*iov
, int iovlen
);
79 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock
)
87 #define FLAG_TO_SET O_NONBLOCK
90 #define FLAG_TO_SET O_NDELAY
92 #define FLAG_TO_SET FNDELAY
96 flags
= fcntl(sock
, F_GETFL
);
100 flags
|= FLAG_TO_SET
;
101 if (fcntl(sock
, F_SETFL
, flags
) == -1) {
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock
)
115 flags
= fcntl(sock
, F_GETFD
, 0);
120 if (fcntl(sock
, F_SETFD
, flags
) == -1) {
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock
)
130 int ret
= prepare_socket_nonblock(sock
);
135 return prepare_socket_cloexec(sock
);
138 static int unix_dgram_init(const char *path
, size_t max_msg
,
139 const struct poll_funcs
*ev_funcs
,
140 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
141 uint8_t *msg
, size_t msg_len
,
144 struct unix_dgram_ctx
**result
)
146 struct unix_dgram_ctx
*ctx
;
147 struct sockaddr_un addr
= { 0, };
152 pathlen
= strlen(path
)+1;
153 if (pathlen
> sizeof(addr
.sun_path
)) {
160 ctx
= malloc(offsetof(struct unix_dgram_ctx
, path
) + pathlen
);
165 memcpy(ctx
->path
, path
, pathlen
);
170 ctx
->recv_buf
= malloc(max_msg
);
171 if (ctx
->recv_buf
== NULL
) {
175 ctx
->max_msg
= max_msg
;
176 ctx
->ev_funcs
= ev_funcs
;
177 ctx
->recv_callback
= recv_callback
;
178 ctx
->private_data
= private_data
;
179 ctx
->sock_read_watch
= NULL
;
180 ctx
->send_pool
= NULL
;
181 ctx
->pool_read_watch
= NULL
;
182 ctx
->send_queues
= NULL
;
183 ctx
->created_pid
= (pid_t
)-1;
185 ctx
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
186 if (ctx
->sock
== -1) {
191 /* Set non-blocking and close-on-exec. */
192 ret
= prepare_socket(ctx
->sock
);
198 addr
.sun_family
= AF_UNIX
;
199 memcpy(addr
.sun_path
, path
, pathlen
);
201 ret
= bind(ctx
->sock
, (struct sockaddr
*)(void *)&addr
,
208 ctx
->created_pid
= getpid();
210 ctx
->sock_read_watch
= ctx
->ev_funcs
->watch_new(
211 ctx
->ev_funcs
, ctx
->sock
, POLLIN
,
212 unix_dgram_recv_handler
, ctx
);
214 if (ctx
->sock_read_watch
== NULL
) {
231 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
234 struct unix_dgram_ctx
*ctx
= (struct unix_dgram_ctx
*)private_data
;
239 iov
= (struct iovec
) {
240 .iov_base
= (void *)ctx
->recv_buf
,
241 .iov_len
= ctx
->max_msg
,
244 msg
= (struct msghdr
) {
247 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
253 received
= recvmsg(fd
, &msg
, 0);
254 if (received
== -1) {
255 if ((errno
== EAGAIN
) ||
257 (errno
== EWOULDBLOCK
) ||
259 (errno
== EINTR
) || (errno
== ENOMEM
)) {
260 /* Not really an error - just try again. */
263 /* Problem with the socket. Set it unreadable. */
264 ctx
->ev_funcs
->watch_update(w
, 0);
267 if (received
> ctx
->max_msg
) {
268 /* More than we expected, not for us */
271 ctx
->recv_callback(ctx
, ctx
->recv_buf
, received
, ctx
->private_data
);
274 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
277 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx
*ctx
)
281 if (ctx
->send_pool
!= NULL
) {
285 ret
= pthreadpool_init(0, &ctx
->send_pool
);
290 signalfd
= pthreadpool_signal_fd(ctx
->send_pool
);
292 ctx
->pool_read_watch
= ctx
->ev_funcs
->watch_new(
293 ctx
->ev_funcs
, signalfd
, POLLIN
,
294 unix_dgram_job_finished
, ctx
);
295 if (ctx
->pool_read_watch
== NULL
) {
296 pthreadpool_destroy(ctx
->send_pool
);
297 ctx
->send_pool
= NULL
;
304 static int unix_dgram_send_queue_init(
305 struct unix_dgram_ctx
*ctx
, const char *path
,
306 struct unix_dgram_send_queue
**result
)
308 struct unix_dgram_send_queue
*q
;
309 struct sockaddr_un addr
= { 0, };
313 pathlen
= strlen(path
)+1;
315 if (pathlen
> sizeof(addr
.sun_path
)) {
319 q
= malloc(offsetof(struct unix_dgram_send_queue
, path
) + pathlen
);
325 memcpy(q
->path
, path
, pathlen
);
327 q
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
333 err
= prepare_socket_cloexec(q
->sock
);
338 addr
.sun_family
= AF_UNIX
;
339 memcpy(addr
.sun_path
, path
, pathlen
+1);
342 ret
= connect(q
->sock
, (struct sockaddr
*)&addr
, sizeof(addr
));
343 } while ((ret
== -1) && (errno
== EINTR
));
350 err
= unix_dgram_init_pthreadpool(ctx
);
355 DLIST_ADD(ctx
->send_queues
, q
);
367 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue
*q
)
369 struct unix_dgram_ctx
*ctx
= q
->ctx
;
371 while (q
->msgs
!= NULL
) {
372 struct unix_dgram_msg
*msg
;
374 DLIST_REMOVE(q
->msgs
, msg
);
378 DLIST_REMOVE(ctx
->send_queues
, q
);
382 static struct unix_dgram_send_queue
*find_send_queue(
383 struct unix_dgram_ctx
*ctx
, const char *dst_sock
)
385 struct unix_dgram_send_queue
*s
;
387 for (s
= ctx
->send_queues
; s
!= NULL
; s
= s
->next
) {
388 if (strcmp(s
->path
, dst_sock
) == 0) {
395 static int queue_msg(struct unix_dgram_send_queue
*q
,
396 const struct iovec
*iov
, int iovlen
)
398 struct unix_dgram_msg
*msg
;
403 buflen
= iov_buflen(iov
, iovlen
);
408 msglen
= offsetof(struct unix_dgram_msg
, buf
) + buflen
;
409 if ((msglen
< buflen
) ||
410 (msglen
< offsetof(struct unix_dgram_msg
, buf
))) {
415 msg
= malloc(msglen
);
419 msg
->buflen
= buflen
;
423 for (i
=0; i
<iovlen
; i
++) {
424 memcpy(&msg
->buf
[buflen
], iov
[i
].iov_base
, iov
[i
].iov_len
);
425 buflen
+= iov
[i
].iov_len
;
428 DLIST_ADD_END(q
->msgs
, msg
, struct unix_dgram_msg
);
432 static void unix_dgram_send_job(void *private_data
)
434 struct unix_dgram_msg
*msg
= private_data
;
437 msg
->sent
= send(msg
->sock
, msg
->buf
, msg
->buflen
, 0);
438 } while ((msg
->sent
== -1) && (errno
== EINTR
));
441 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
444 struct unix_dgram_ctx
*ctx
= private_data
;
445 struct unix_dgram_send_queue
*q
;
446 struct unix_dgram_msg
*msg
;
449 ret
= pthreadpool_finished_jobs(ctx
->send_pool
, &job
, 1);
454 for (q
= ctx
->send_queues
; q
!= NULL
; q
= q
->next
) {
455 if (job
== q
->sock
) {
461 /* Huh? Should not happen */
466 DLIST_REMOVE(q
->msgs
, msg
);
469 if (q
->msgs
!= NULL
) {
470 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
471 unix_dgram_send_job
, q
->msgs
);
477 unix_dgram_send_queue_free(q
);
480 static int unix_dgram_send(struct unix_dgram_ctx
*ctx
, const char *dst_sock
,
481 const struct iovec
*iov
, int iovlen
)
483 struct unix_dgram_send_queue
*q
;
484 struct sockaddr_un addr
= { 0, };
489 dst_len
= strlen(dst_sock
);
490 if (dst_len
>= sizeof(addr
.sun_path
)) {
495 * To preserve message ordering, we have to queue a message when
496 * others are waiting in line already.
498 q
= find_send_queue(ctx
, dst_sock
);
500 return queue_msg(q
, iov
, iovlen
);
504 * Try a cheap nonblocking send
507 addr
.sun_family
= AF_UNIX
;
508 memcpy(addr
.sun_path
, dst_sock
, dst_len
);
510 msg
.msg_name
= &addr
;
511 msg
.msg_namelen
= sizeof(addr
);
512 msg
.msg_iov
= discard_const_p(struct iovec
, iov
);
513 msg
.msg_iovlen
= iovlen
;
514 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
515 msg
.msg_control
= NULL
;
516 msg
.msg_controllen
= 0;
520 ret
= sendmsg(ctx
->sock
, &msg
, 0);
525 if ((errno
!= EWOULDBLOCK
) && (errno
!= EAGAIN
) && (errno
!= EINTR
)) {
527 if ((errno
!= EAGAIN
) && (errno
!= EINTR
)) {
532 ret
= unix_dgram_send_queue_init(ctx
, dst_sock
, &q
);
536 ret
= queue_msg(q
, iov
, iovlen
);
538 unix_dgram_send_queue_free(q
);
541 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
542 unix_dgram_send_job
, q
->msgs
);
544 unix_dgram_send_queue_free(q
);
550 static int unix_dgram_sock(struct unix_dgram_ctx
*ctx
)
555 static int unix_dgram_free(struct unix_dgram_ctx
*ctx
)
557 if (ctx
->send_queues
!= NULL
) {
561 if (ctx
->send_pool
!= NULL
) {
562 int ret
= pthreadpool_destroy(ctx
->send_pool
);
566 ctx
->ev_funcs
->watch_free(ctx
->pool_read_watch
);
569 ctx
->ev_funcs
->watch_free(ctx
->sock_read_watch
);
571 if (getpid() == ctx
->created_pid
) {
572 /* If we created it, unlink. Otherwise someone else might
573 * still have it open */
584 * Every message starts with a uint64_t cookie.
586 * A value of 0 indicates a single-fragment message which is complete in
587 * itself. The data immediately follows the cookie.
589 * Every multi-fragment message has a cookie != 0 and starts with a cookie
590 * followed by a struct unix_msg_header and then the data. The pid and sock
591 * fields are used to assure uniqueness on the receiver side.
594 struct unix_msg_hdr
{
601 struct unix_msg
*prev
, *next
;
610 struct unix_msg_ctx
{
611 struct unix_dgram_ctx
*dgram
;
615 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
616 uint8_t *msg
, size_t msg_len
,
620 struct unix_msg
*msgs
;
623 static void unix_msg_recv(struct unix_dgram_ctx
*ctx
,
624 uint8_t *msg
, size_t msg_len
,
627 int unix_msg_init(const char *path
, const struct poll_funcs
*ev_funcs
,
628 size_t fragment_len
, uint64_t cookie
,
629 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
630 uint8_t *msg
, size_t msg_len
,
633 struct unix_msg_ctx
**result
)
635 struct unix_msg_ctx
*ctx
;
638 ctx
= malloc(sizeof(*ctx
));
643 ret
= unix_dgram_init(path
, fragment_len
, ev_funcs
,
644 unix_msg_recv
, ctx
, &ctx
->dgram
);
650 ctx
->fragment_len
= fragment_len
;
651 ctx
->cookie
= cookie
;
652 ctx
->recv_callback
= recv_callback
;
653 ctx
->private_data
= private_data
;
660 int unix_msg_send(struct unix_msg_ctx
*ctx
, const char *dst_sock
,
661 const struct iovec
*iov
, int iovlen
)
666 struct iovec
*iov_copy
;
667 struct unix_msg_hdr hdr
;
668 struct iovec src_iov
;
674 msglen
= iov_buflen(iov
, iovlen
);
679 if (msglen
<= (ctx
->fragment_len
- sizeof(uint64_t))) {
680 struct iovec tmp_iov
[iovlen
+1];
683 tmp_iov
[0].iov_base
= &cookie
;
684 tmp_iov
[0].iov_len
= sizeof(cookie
);
686 memcpy(&tmp_iov
[1], iov
,
687 sizeof(struct iovec
) * iovlen
);
690 return unix_dgram_send(ctx
->dgram
, dst_sock
, tmp_iov
,
696 hdr
.sock
= unix_dgram_sock(ctx
->dgram
);
698 iov_copy
= malloc(sizeof(struct iovec
) * (iovlen
+ 2));
699 if (iov_copy
== NULL
) {
702 iov_copy
[0].iov_base
= &ctx
->cookie
;
703 iov_copy
[0].iov_len
= sizeof(ctx
->cookie
);
704 iov_copy
[1].iov_base
= &hdr
;
705 iov_copy
[1].iov_len
= sizeof(hdr
);
711 * The following write loop sends the user message in pieces. We have
712 * filled the first two iovecs above with "cookie" and "hdr". In the
713 * following loops we pull message chunks from the user iov array and
714 * fill iov_copy piece by piece, possibly truncating chunks from the
715 * caller's iov array. Ugly, but hopefully efficient.
718 while (sent
< msglen
) {
720 size_t iov_index
= 2;
722 fragment_len
= sizeof(ctx
->cookie
) + sizeof(hdr
);
724 while (fragment_len
< ctx
->fragment_len
) {
727 space
= ctx
->fragment_len
- fragment_len
;
728 chunk
= MIN(space
, src_iov
.iov_len
);
730 iov_copy
[iov_index
].iov_base
= src_iov
.iov_base
;
731 iov_copy
[iov_index
].iov_len
= chunk
;
734 src_iov
.iov_base
= (char *)src_iov
.iov_base
+ chunk
;
735 src_iov
.iov_len
-= chunk
;
736 fragment_len
+= chunk
;
738 if (src_iov
.iov_len
== 0) {
747 sent
+= (fragment_len
- sizeof(ctx
->cookie
) - sizeof(hdr
));
749 ret
= unix_dgram_send(ctx
->dgram
, dst_sock
,
750 iov_copy
, iov_index
);
759 if (ctx
->cookie
== 0) {
766 static void unix_msg_recv(struct unix_dgram_ctx
*dgram_ctx
,
767 uint8_t *buf
, size_t buflen
,
770 struct unix_msg_ctx
*ctx
= (struct unix_msg_ctx
*)private_data
;
771 struct unix_msg_hdr hdr
;
772 struct unix_msg
*msg
;
776 if (buflen
< sizeof(cookie
)) {
779 memcpy(&cookie
, buf
, sizeof(cookie
));
781 buf
+= sizeof(cookie
);
782 buflen
-= sizeof(cookie
);
785 ctx
->recv_callback(ctx
, buf
, buflen
, ctx
->private_data
);
789 if (buflen
< sizeof(hdr
)) {
792 memcpy(&hdr
, buf
, sizeof(hdr
));
795 buflen
-= sizeof(hdr
);
797 for (msg
= ctx
->msgs
; msg
!= NULL
; msg
= msg
->next
) {
798 if ((msg
->sender_pid
== hdr
.pid
) &&
799 (msg
->sender_sock
== hdr
.sock
)) {
804 if ((msg
!= NULL
) && (msg
->cookie
!= cookie
)) {
805 DLIST_REMOVE(ctx
->msgs
, msg
);
811 msg
= malloc(offsetof(struct unix_msg
, buf
) + hdr
.msglen
);
815 msg
->msglen
= hdr
.msglen
;
817 msg
->sender_pid
= hdr
.pid
;
818 msg
->sender_sock
= hdr
.sock
;
819 msg
->cookie
= cookie
;
820 DLIST_ADD(ctx
->msgs
, msg
);
823 space
= msg
->msglen
- msg
->received
;
824 if (buflen
> space
) {
828 memcpy(msg
->buf
+ msg
->received
, buf
, buflen
);
829 msg
->received
+= buflen
;
831 if (msg
->received
< msg
->msglen
) {
835 DLIST_REMOVE(ctx
->msgs
, msg
);
836 ctx
->recv_callback(ctx
, msg
->buf
, msg
->msglen
, ctx
->private_data
);
840 int unix_msg_free(struct unix_msg_ctx
*ctx
)
844 ret
= unix_dgram_free(ctx
->dgram
);
849 while (ctx
->msgs
!= NULL
) {
850 struct unix_msg
*msg
= ctx
->msgs
;
851 DLIST_REMOVE(ctx
->msgs
, msg
);
859 static ssize_t
iov_buflen(const struct iovec
*iov
, int iovlen
)
864 for (i
=0; i
<iovlen
; i
++) {
865 size_t thislen
= iov
[i
].iov_len
;
866 size_t tmp
= buflen
+ thislen
;
868 if ((tmp
< buflen
) || (tmp
< thislen
)) {