2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg
{
40 struct unix_dgram_msg
*prev
, *next
;
49 struct unix_dgram_send_queue
{
50 struct unix_dgram_send_queue
*prev
, *next
;
51 struct unix_dgram_ctx
*ctx
;
53 struct unix_dgram_msg
*msgs
;
57 struct unix_dgram_ctx
{
60 const struct poll_funcs
*ev_funcs
;
63 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
64 uint8_t *msg
, size_t msg_len
,
68 struct poll_watch
*sock_read_watch
;
69 struct unix_dgram_send_queue
*send_queues
;
71 struct pthreadpool
*send_pool
;
72 struct poll_watch
*pool_read_watch
;
78 static ssize_t
iov_buflen(const struct iovec
*iov
, int iovlen
);
79 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock
)
87 #define FLAG_TO_SET O_NONBLOCK
90 #define FLAG_TO_SET O_NDELAY
92 #define FLAG_TO_SET FNDELAY
96 flags
= fcntl(sock
, F_GETFL
);
100 flags
|= FLAG_TO_SET
;
101 if (fcntl(sock
, F_SETFL
, flags
) == -1) {
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock
)
115 flags
= fcntl(sock
, F_GETFD
, 0);
120 if (fcntl(sock
, F_SETFD
, flags
) == -1) {
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock
)
130 int ret
= prepare_socket_nonblock(sock
);
135 return prepare_socket_cloexec(sock
);
138 static int unix_dgram_init(const char *path
, size_t max_msg
,
139 const struct poll_funcs
*ev_funcs
,
140 void (*recv_callback
)(struct unix_dgram_ctx
*ctx
,
141 uint8_t *msg
, size_t msg_len
,
144 struct unix_dgram_ctx
**result
)
146 struct unix_dgram_ctx
*ctx
;
147 struct sockaddr_un addr
= { 0, };
152 pathlen
= strlen(path
)+1;
153 if (pathlen
> sizeof(addr
.sun_path
)) {
160 ctx
= malloc(offsetof(struct unix_dgram_ctx
, path
) + pathlen
);
165 memcpy(ctx
->path
, path
, pathlen
);
170 ctx
->recv_buf
= malloc(max_msg
);
171 if (ctx
->recv_buf
== NULL
) {
175 ctx
->max_msg
= max_msg
;
176 ctx
->ev_funcs
= ev_funcs
;
177 ctx
->recv_callback
= recv_callback
;
178 ctx
->private_data
= private_data
;
179 ctx
->sock_read_watch
= NULL
;
180 ctx
->send_pool
= NULL
;
181 ctx
->pool_read_watch
= NULL
;
182 ctx
->send_queues
= NULL
;
183 ctx
->created_pid
= (pid_t
)-1;
185 ctx
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
186 if (ctx
->sock
== -1) {
191 /* Set non-blocking and close-on-exec. */
192 ret
= prepare_socket(ctx
->sock
);
198 addr
.sun_family
= AF_UNIX
;
199 memcpy(addr
.sun_path
, path
, pathlen
);
201 ret
= bind(ctx
->sock
, (struct sockaddr
*)(void *)&addr
,
208 ctx
->created_pid
= getpid();
210 ctx
->sock_read_watch
= ctx
->ev_funcs
->watch_new(
211 ctx
->ev_funcs
, ctx
->sock
, POLLIN
,
212 unix_dgram_recv_handler
, ctx
);
214 if (ctx
->sock_read_watch
== NULL
) {
231 static void unix_dgram_recv_handler(struct poll_watch
*w
, int fd
, short events
,
234 struct unix_dgram_ctx
*ctx
= (struct unix_dgram_ctx
*)private_data
;
239 iov
= (struct iovec
) {
240 .iov_base
= (void *)ctx
->recv_buf
,
241 .iov_len
= ctx
->max_msg
,
244 msg
= (struct msghdr
) {
251 received
= recvmsg(fd
, &msg
, 0);
252 if (received
== -1) {
253 if ((errno
== EAGAIN
) ||
255 (errno
== EWOULDBLOCK
) ||
257 (errno
== EINTR
) || (errno
== ENOMEM
)) {
258 /* Not really an error - just try again. */
261 /* Problem with the socket. Set it unreadable. */
262 ctx
->ev_funcs
->watch_update(w
, 0);
265 if (received
> ctx
->max_msg
) {
266 /* More than we expected, not for us */
269 ctx
->recv_callback(ctx
, ctx
->recv_buf
, received
, ctx
->private_data
);
272 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
275 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx
*ctx
)
279 if (ctx
->send_pool
!= NULL
) {
283 ret
= pthreadpool_init(0, &ctx
->send_pool
);
288 signalfd
= pthreadpool_signal_fd(ctx
->send_pool
);
290 ctx
->pool_read_watch
= ctx
->ev_funcs
->watch_new(
291 ctx
->ev_funcs
, signalfd
, POLLIN
,
292 unix_dgram_job_finished
, ctx
);
293 if (ctx
->pool_read_watch
== NULL
) {
294 pthreadpool_destroy(ctx
->send_pool
);
295 ctx
->send_pool
= NULL
;
302 static int unix_dgram_send_queue_init(
303 struct unix_dgram_ctx
*ctx
, const char *path
,
304 struct unix_dgram_send_queue
**result
)
306 struct unix_dgram_send_queue
*q
;
307 struct sockaddr_un addr
= { 0, };
311 pathlen
= strlen(path
)+1;
313 if (pathlen
> sizeof(addr
.sun_path
)) {
317 q
= malloc(offsetof(struct unix_dgram_send_queue
, path
) + pathlen
);
323 memcpy(q
->path
, path
, pathlen
);
325 q
->sock
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
331 err
= prepare_socket_cloexec(q
->sock
);
336 addr
.sun_family
= AF_UNIX
;
337 memcpy(addr
.sun_path
, path
, pathlen
+1);
340 ret
= connect(q
->sock
, (struct sockaddr
*)&addr
, sizeof(addr
));
341 } while ((ret
== -1) && (errno
== EINTR
));
348 err
= unix_dgram_init_pthreadpool(ctx
);
353 DLIST_ADD(ctx
->send_queues
, q
);
365 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue
*q
)
367 struct unix_dgram_ctx
*ctx
= q
->ctx
;
369 while (q
->msgs
!= NULL
) {
370 struct unix_dgram_msg
*msg
;
372 DLIST_REMOVE(q
->msgs
, msg
);
376 DLIST_REMOVE(ctx
->send_queues
, q
);
380 static struct unix_dgram_send_queue
*find_send_queue(
381 struct unix_dgram_ctx
*ctx
, const char *dst_sock
)
383 struct unix_dgram_send_queue
*s
;
385 for (s
= ctx
->send_queues
; s
!= NULL
; s
= s
->next
) {
386 if (strcmp(s
->path
, dst_sock
) == 0) {
393 static int queue_msg(struct unix_dgram_send_queue
*q
,
394 const struct iovec
*iov
, int iovlen
)
396 struct unix_dgram_msg
*msg
;
401 buflen
= iov_buflen(iov
, iovlen
);
406 msglen
= offsetof(struct unix_dgram_msg
, buf
) + buflen
;
407 if ((msglen
< buflen
) ||
408 (msglen
< offsetof(struct unix_dgram_msg
, buf
))) {
413 msg
= malloc(msglen
);
417 msg
->buflen
= buflen
;
421 for (i
=0; i
<iovlen
; i
++) {
422 memcpy(&msg
->buf
[buflen
], iov
[i
].iov_base
, iov
[i
].iov_len
);
423 buflen
+= iov
[i
].iov_len
;
426 DLIST_ADD_END(q
->msgs
, msg
, struct unix_dgram_msg
);
430 static void unix_dgram_send_job(void *private_data
)
432 struct unix_dgram_msg
*msg
= private_data
;
435 msg
->sent
= send(msg
->sock
, msg
->buf
, msg
->buflen
, 0);
436 } while ((msg
->sent
== -1) && (errno
== EINTR
));
439 static void unix_dgram_job_finished(struct poll_watch
*w
, int fd
, short events
,
442 struct unix_dgram_ctx
*ctx
= private_data
;
443 struct unix_dgram_send_queue
*q
;
444 struct unix_dgram_msg
*msg
;
447 ret
= pthreadpool_finished_jobs(ctx
->send_pool
, &job
, 1);
452 for (q
= ctx
->send_queues
; q
!= NULL
; q
= q
->next
) {
453 if (job
== q
->sock
) {
459 /* Huh? Should not happen */
464 DLIST_REMOVE(q
->msgs
, msg
);
467 if (q
->msgs
!= NULL
) {
468 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
469 unix_dgram_send_job
, q
->msgs
);
475 unix_dgram_send_queue_free(q
);
478 static int unix_dgram_send(struct unix_dgram_ctx
*ctx
, const char *dst_sock
,
479 const struct iovec
*iov
, int iovlen
)
481 struct unix_dgram_send_queue
*q
;
482 struct sockaddr_un addr
= { 0, };
487 dst_len
= strlen(dst_sock
);
488 if (dst_len
>= sizeof(addr
.sun_path
)) {
493 * To preserve message ordering, we have to queue a message when
494 * others are waiting in line already.
496 q
= find_send_queue(ctx
, dst_sock
);
498 return queue_msg(q
, iov
, iovlen
);
502 * Try a cheap nonblocking send
505 addr
.sun_family
= AF_UNIX
;
506 memcpy(addr
.sun_path
, dst_sock
, dst_len
);
508 msg
.msg_name
= &addr
;
509 msg
.msg_namelen
= sizeof(addr
);
510 msg
.msg_iov
= discard_const_p(struct iovec
, iov
);
511 msg
.msg_iovlen
= iovlen
;
512 msg
.msg_control
= NULL
;
513 msg
.msg_controllen
= 0;
516 ret
= sendmsg(ctx
->sock
, &msg
, 0);
521 if ((errno
!= EWOULDBLOCK
) && (errno
!= EAGAIN
) && (errno
!= EINTR
)) {
523 if ((errno
!= EAGAIN
) && (errno
!= EINTR
)) {
528 ret
= unix_dgram_send_queue_init(ctx
, dst_sock
, &q
);
532 ret
= queue_msg(q
, iov
, iovlen
);
534 unix_dgram_send_queue_free(q
);
537 ret
= pthreadpool_add_job(ctx
->send_pool
, q
->sock
,
538 unix_dgram_send_job
, q
->msgs
);
540 unix_dgram_send_queue_free(q
);
546 static int unix_dgram_sock(struct unix_dgram_ctx
*ctx
)
551 static int unix_dgram_free(struct unix_dgram_ctx
*ctx
)
553 if (ctx
->send_queues
!= NULL
) {
557 if (ctx
->send_pool
!= NULL
) {
558 int ret
= pthreadpool_destroy(ctx
->send_pool
);
562 ctx
->ev_funcs
->watch_free(ctx
->pool_read_watch
);
565 ctx
->ev_funcs
->watch_free(ctx
->sock_read_watch
);
567 if (getpid() == ctx
->created_pid
) {
568 /* If we created it, unlink. Otherwise someone else might
569 * still have it open */
580 * Every message starts with a uint64_t cookie.
582 * A value of 0 indicates a single-fragment message which is complete in
583 * itself. The data immediately follows the cookie.
585 * Every multi-fragment message has a cookie != 0 and starts with a cookie
586 * followed by a struct unix_msg_header and then the data. The pid and sock
587 * fields are used to assure uniqueness on the receiver side.
590 struct unix_msg_hdr
{
597 struct unix_msg
*prev
, *next
;
606 struct unix_msg_ctx
{
607 struct unix_dgram_ctx
*dgram
;
611 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
612 uint8_t *msg
, size_t msg_len
,
616 struct unix_msg
*msgs
;
619 static void unix_msg_recv(struct unix_dgram_ctx
*ctx
,
620 uint8_t *msg
, size_t msg_len
,
623 int unix_msg_init(const char *path
, const struct poll_funcs
*ev_funcs
,
624 size_t fragment_len
, uint64_t cookie
,
625 void (*recv_callback
)(struct unix_msg_ctx
*ctx
,
626 uint8_t *msg
, size_t msg_len
,
629 struct unix_msg_ctx
**result
)
631 struct unix_msg_ctx
*ctx
;
634 ctx
= malloc(sizeof(*ctx
));
639 ret
= unix_dgram_init(path
, fragment_len
, ev_funcs
,
640 unix_msg_recv
, ctx
, &ctx
->dgram
);
646 ctx
->fragment_len
= fragment_len
;
647 ctx
->cookie
= cookie
;
648 ctx
->recv_callback
= recv_callback
;
649 ctx
->private_data
= private_data
;
656 int unix_msg_send(struct unix_msg_ctx
*ctx
, const char *dst_sock
,
657 const struct iovec
*iov
, int iovlen
)
662 struct iovec
*iov_copy
;
663 struct unix_msg_hdr hdr
;
664 struct iovec src_iov
;
670 msglen
= iov_buflen(iov
, iovlen
);
675 if (msglen
<= (ctx
->fragment_len
- sizeof(uint64_t))) {
676 struct iovec tmp_iov
[iovlen
+1];
679 tmp_iov
[0].iov_base
= &cookie
;
680 tmp_iov
[0].iov_len
= sizeof(cookie
);
682 memcpy(&tmp_iov
[1], iov
,
683 sizeof(struct iovec
) * iovlen
);
686 return unix_dgram_send(ctx
->dgram
, dst_sock
, tmp_iov
,
692 hdr
.sock
= unix_dgram_sock(ctx
->dgram
);
694 iov_copy
= malloc(sizeof(struct iovec
) * (iovlen
+ 2));
695 if (iov_copy
== NULL
) {
698 iov_copy
[0].iov_base
= &ctx
->cookie
;
699 iov_copy
[0].iov_len
= sizeof(ctx
->cookie
);
700 iov_copy
[1].iov_base
= &hdr
;
701 iov_copy
[1].iov_len
= sizeof(hdr
);
707 * The following write loop sends the user message in pieces. We have
708 * filled the first two iovecs above with "cookie" and "hdr". In the
709 * following loops we pull message chunks from the user iov array and
710 * fill iov_copy piece by piece, possibly truncating chunks from the
711 * caller's iov array. Ugly, but hopefully efficient.
714 while (sent
< msglen
) {
716 size_t iov_index
= 2;
718 fragment_len
= sizeof(ctx
->cookie
) + sizeof(hdr
);
720 while (fragment_len
< ctx
->fragment_len
) {
723 space
= ctx
->fragment_len
- fragment_len
;
724 chunk
= MIN(space
, src_iov
.iov_len
);
726 iov_copy
[iov_index
].iov_base
= src_iov
.iov_base
;
727 iov_copy
[iov_index
].iov_len
= chunk
;
730 src_iov
.iov_base
= (char *)src_iov
.iov_base
+ chunk
;
731 src_iov
.iov_len
-= chunk
;
732 fragment_len
+= chunk
;
734 if (src_iov
.iov_len
== 0) {
743 sent
+= (fragment_len
- sizeof(ctx
->cookie
) - sizeof(hdr
));
745 ret
= unix_dgram_send(ctx
->dgram
, dst_sock
,
746 iov_copy
, iov_index
);
755 if (ctx
->cookie
== 0) {
762 static void unix_msg_recv(struct unix_dgram_ctx
*dgram_ctx
,
763 uint8_t *buf
, size_t buflen
,
766 struct unix_msg_ctx
*ctx
= (struct unix_msg_ctx
*)private_data
;
767 struct unix_msg_hdr hdr
;
768 struct unix_msg
*msg
;
772 if (buflen
< sizeof(cookie
)) {
775 memcpy(&cookie
, buf
, sizeof(cookie
));
777 buf
+= sizeof(cookie
);
778 buflen
-= sizeof(cookie
);
781 ctx
->recv_callback(ctx
, buf
, buflen
, ctx
->private_data
);
785 if (buflen
< sizeof(hdr
)) {
788 memcpy(&hdr
, buf
, sizeof(hdr
));
791 buflen
-= sizeof(hdr
);
793 for (msg
= ctx
->msgs
; msg
!= NULL
; msg
= msg
->next
) {
794 if ((msg
->sender_pid
== hdr
.pid
) &&
795 (msg
->sender_sock
== hdr
.sock
)) {
800 if ((msg
!= NULL
) && (msg
->cookie
!= cookie
)) {
801 DLIST_REMOVE(ctx
->msgs
, msg
);
807 msg
= malloc(offsetof(struct unix_msg
, buf
) + hdr
.msglen
);
811 msg
->msglen
= hdr
.msglen
;
813 msg
->sender_pid
= hdr
.pid
;
814 msg
->sender_sock
= hdr
.sock
;
815 msg
->cookie
= cookie
;
816 DLIST_ADD(ctx
->msgs
, msg
);
819 space
= msg
->msglen
- msg
->received
;
820 if (buflen
> space
) {
824 memcpy(msg
->buf
+ msg
->received
, buf
, buflen
);
825 msg
->received
+= buflen
;
827 if (msg
->received
< msg
->msglen
) {
831 DLIST_REMOVE(ctx
->msgs
, msg
);
832 ctx
->recv_callback(ctx
, msg
->buf
, msg
->msglen
, ctx
->private_data
);
836 int unix_msg_free(struct unix_msg_ctx
*ctx
)
840 ret
= unix_dgram_free(ctx
->dgram
);
845 while (ctx
->msgs
!= NULL
) {
846 struct unix_msg
*msg
= ctx
->msgs
;
847 DLIST_REMOVE(ctx
->msgs
, msg
);
855 static ssize_t
iov_buflen(const struct iovec
*iov
, int iovlen
)
860 for (i
=0; i
<iovlen
; i
++) {
861 size_t thislen
= iov
[i
].iov_len
;
862 size_t tmp
= buflen
+ thislen
;
864 if ((tmp
< buflen
) || (tmp
< thislen
)) {