4 Copyright (C) Andrew Tridgell 2006
5 Copyright (C) Ronnie Sahlberg 2008
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
28 #include "lib/util/debug.h"
29 #include "lib/util/time.h"
30 #include "lib/util/blocking.h"
32 #include "ctdb_private.h"
34 #include "common/system.h"
35 #include "common/common.h"
36 #include "common/logging.h"
41 stop any connecting (established or pending) to a node
43 void ctdb_tcp_stop_connection(struct ctdb_node
*node
)
45 struct ctdb_tcp_node
*tnode
= talloc_get_type(
46 node
->private_data
, struct ctdb_tcp_node
);
48 ctdb_queue_set_fd(tnode
->out_queue
, -1);
49 talloc_free(tnode
->connect_te
);
50 talloc_free(tnode
->connect_fde
);
51 tnode
->connect_fde
= NULL
;
52 tnode
->connect_te
= NULL
;
53 if (tnode
->fd
!= -1) {
61 called when a complete packet has come in - should not happen on this socket
62 unless the other side closes the connection with RST or FIN
64 void ctdb_tcp_tnode_cb(uint8_t *data
, size_t cnt
, void *private_data
)
66 struct ctdb_node
*node
= talloc_get_type(private_data
, struct ctdb_node
);
67 struct ctdb_tcp_node
*tnode
= talloc_get_type(
68 node
->private_data
, struct ctdb_tcp_node
);
71 node
->ctdb
->upcalls
->node_dead(node
);
74 ctdb_tcp_stop_connection(node
);
75 tnode
->connect_te
= tevent_add_timer(node
->ctdb
->ev
, tnode
,
76 timeval_current_ofs(3, 0),
77 ctdb_tcp_node_connect
, node
);
82 called when socket becomes writeable on connect
84 static void ctdb_node_connect_write(struct tevent_context
*ev
,
85 struct tevent_fd
*fde
,
86 uint16_t flags
, void *private_data
)
88 struct ctdb_node
*node
= talloc_get_type(private_data
,
90 struct ctdb_tcp_node
*tnode
= talloc_get_type(node
->private_data
,
91 struct ctdb_tcp_node
);
92 struct ctdb_context
*ctdb
= node
->ctdb
;
94 socklen_t len
= sizeof(error
);
97 talloc_free(tnode
->connect_te
);
98 tnode
->connect_te
= NULL
;
100 if (getsockopt(tnode
->fd
, SOL_SOCKET
, SO_ERROR
, &error
, &len
) != 0 ||
102 ctdb_tcp_stop_connection(node
);
103 tnode
->connect_te
= tevent_add_timer(ctdb
->ev
, tnode
,
104 timeval_current_ofs(1, 0),
105 ctdb_tcp_node_connect
, node
);
109 talloc_free(tnode
->connect_fde
);
110 tnode
->connect_fde
= NULL
;
112 if (setsockopt(tnode
->fd
,IPPROTO_TCP
,TCP_NODELAY
,(char *)&one
,sizeof(one
)) == -1) {
113 DEBUG(DEBUG_WARNING
, ("Failed to set TCP_NODELAY on fd - %s\n",
116 if (setsockopt(tnode
->fd
,SOL_SOCKET
,SO_KEEPALIVE
,(char *)&one
,sizeof(one
)) == -1) {
117 DEBUG(DEBUG_WARNING
, ("Failed to set KEEPALIVE on fd - %s\n",
121 ctdb_queue_set_fd(tnode
->out_queue
, tnode
->fd
);
123 /* the queue subsystem now owns this fd */
129 called when we should try and establish a tcp connection to a node
131 void ctdb_tcp_node_connect(struct tevent_context
*ev
, struct tevent_timer
*te
,
132 struct timeval t
, void *private_data
)
134 struct ctdb_node
*node
= talloc_get_type(private_data
,
136 struct ctdb_tcp_node
*tnode
= talloc_get_type(node
->private_data
,
137 struct ctdb_tcp_node
);
138 struct ctdb_context
*ctdb
= node
->ctdb
;
139 ctdb_sock_addr sock_in
;
142 ctdb_sock_addr sock_out
;
145 ctdb_tcp_stop_connection(node
);
147 sock_out
= node
->address
;
149 tnode
->fd
= socket(sock_out
.sa
.sa_family
, SOCK_STREAM
, IPPROTO_TCP
);
150 if (tnode
->fd
== -1) {
151 DEBUG(DEBUG_ERR
, (__location__
" Failed to create socket\n"));
155 ret
= set_blocking(tnode
->fd
, false);
159 " failed to set socket non-blocking (%s)\n",
166 set_close_on_exec(tnode
->fd
);
168 DEBUG(DEBUG_DEBUG
, (__location__
" Created TCP SOCKET FD:%d\n", tnode
->fd
));
170 /* Bind our side of the socketpair to the same address we use to listen
171 * on incoming CTDB traffic.
172 * We must specify this address to make sure that the address we expose to
173 * the remote side is actually routable in case CTDB traffic will run on
174 * a dedicated non-routeable network.
176 sock_in
= *ctdb
->address
;
178 /* AIX libs check to see if the socket address and length
179 arguments are consistent with each other on calls like
180 connect(). Can not get by with just sizeof(sock_in),
181 need sizeof(sock_in.ip).
183 switch (sock_in
.sa
.sa_family
) {
185 sock_in
.ip
.sin_port
= 0 /* Any port */;
186 sockin_size
= sizeof(sock_in
.ip
);
187 sockout_size
= sizeof(sock_out
.ip
);
190 sock_in
.ip6
.sin6_port
= 0 /* Any port */;
191 sockin_size
= sizeof(sock_in
.ip6
);
192 sockout_size
= sizeof(sock_out
.ip6
);
195 DEBUG(DEBUG_ERR
, (__location__
" unknown family %u\n",
196 sock_in
.sa
.sa_family
));
202 if (bind(tnode
->fd
, (struct sockaddr
*)&sock_in
, sockin_size
) == -1) {
203 DEBUG(DEBUG_ERR
, (__location__
" Failed to bind socket %s(%d)\n",
204 strerror(errno
), errno
));
210 if (connect(tnode
->fd
, (struct sockaddr
*)&sock_out
, sockout_size
) != 0 &&
211 errno
!= EINPROGRESS
) {
212 ctdb_tcp_stop_connection(node
);
213 tnode
->connect_te
= tevent_add_timer(ctdb
->ev
, tnode
,
214 timeval_current_ofs(1, 0),
215 ctdb_tcp_node_connect
, node
);
219 /* non-blocking connect - wait for write event */
220 tnode
->connect_fde
= tevent_add_fd(node
->ctdb
->ev
, tnode
, tnode
->fd
,
221 TEVENT_FD_WRITE
|TEVENT_FD_READ
,
222 ctdb_node_connect_write
, node
);
224 /* don't give it long to connect - retry in one second. This ensures
225 that we find a node is up quickly (tcp normally backs off a syn reply
226 delay by quite a lot) */
227 tnode
->connect_te
= tevent_add_timer(ctdb
->ev
, tnode
,
228 timeval_current_ofs(1, 0),
229 ctdb_tcp_node_connect
, node
);
233 called when we get contacted by another node
234 currently makes no attempt to check if the connection is really from a ctdb
237 static void ctdb_listen_event(struct tevent_context
*ev
, struct tevent_fd
*fde
,
238 uint16_t flags
, void *private_data
)
240 struct ctdb_context
*ctdb
= talloc_get_type(private_data
, struct ctdb_context
);
241 struct ctdb_tcp
*ctcp
= talloc_get_type(ctdb
->private_data
, struct ctdb_tcp
);
245 struct ctdb_incoming
*in
;
249 memset(&addr
, 0, sizeof(addr
));
251 fd
= accept(ctcp
->listen_fd
, (struct sockaddr
*)&addr
, &len
);
252 if (fd
== -1) return;
253 smb_set_close_on_exec(fd
);
255 nodeid
= ctdb_ip_to_nodeid(ctdb
, &addr
);
258 DEBUG(DEBUG_ERR
, ("Refused connection from unknown node %s\n", ctdb_addr_to_str(&addr
)));
263 in
= talloc_zero(ctcp
, struct ctdb_incoming
);
267 ret
= set_blocking(in
->fd
, false);
271 " failed to set socket non-blocking (%s)\n",
278 set_close_on_exec(in
->fd
);
280 DEBUG(DEBUG_DEBUG
, (__location__
" Created SOCKET FD:%d to incoming ctdb connection\n", fd
));
282 if (setsockopt(in
->fd
,SOL_SOCKET
,SO_KEEPALIVE
,(char *)&one
,sizeof(one
)) == -1) {
283 DEBUG(DEBUG_WARNING
, ("Failed to set KEEPALIVE on fd - %s\n",
287 in
->queue
= ctdb_queue_setup(ctdb
, in
, in
->fd
, CTDB_TCP_ALIGNMENT
,
288 ctdb_tcp_read_cb
, in
, "ctdbd-%s", ctdb_addr_to_str(&addr
));
293 automatically find which address to listen on
295 static int ctdb_tcp_listen_automatic(struct ctdb_context
*ctdb
)
297 struct ctdb_tcp
*ctcp
= talloc_get_type(ctdb
->private_data
,
301 const char *lock_path
= CTDB_RUNDIR
"/.socket_lock";
305 struct tevent_fd
*fde
;
307 /* If there are no nodes, then it won't be possible to find
308 * the first one. Log a failure and short circuit the whole
311 if (ctdb
->num_nodes
== 0) {
312 DEBUG(DEBUG_CRIT
,("No nodes available to attempt bind to - is the nodes file empty?\n"));
316 /* in order to ensure that we don't get two nodes with the
317 same adddress, we must make the bind() and listen() calls
318 atomic. The SO_REUSEADDR setsockopt only prevents double
319 binds if the first socket is in LISTEN state */
320 lock_fd
= open(lock_path
, O_RDWR
|O_CREAT
, 0666);
322 DEBUG(DEBUG_CRIT
,("Unable to open %s\n", lock_path
));
326 lock
.l_type
= F_WRLCK
;
327 lock
.l_whence
= SEEK_SET
;
332 if (fcntl(lock_fd
, F_SETLKW
, &lock
) != 0) {
333 DEBUG(DEBUG_CRIT
,("Unable to lock %s\n", lock_path
));
338 for (i
=0; i
< ctdb
->num_nodes
; i
++) {
339 if (ctdb
->nodes
[i
]->flags
& NODE_FLAGS_DELETED
) {
342 sock
= ctdb
->nodes
[i
]->address
;
344 switch (sock
.sa
.sa_family
) {
346 sock_size
= sizeof(sock
.ip
);
349 sock_size
= sizeof(sock
.ip6
);
352 DEBUG(DEBUG_ERR
, (__location__
" unknown family %u\n",
357 ctcp
->listen_fd
= socket(sock
.sa
.sa_family
, SOCK_STREAM
, IPPROTO_TCP
);
358 if (ctcp
->listen_fd
== -1) {
359 ctdb_set_error(ctdb
, "socket failed\n");
363 set_close_on_exec(ctcp
->listen_fd
);
365 if (setsockopt(ctcp
->listen_fd
,SOL_SOCKET
,SO_REUSEADDR
,
366 (char *)&one
,sizeof(one
)) == -1) {
367 DEBUG(DEBUG_WARNING
, ("Failed to set REUSEADDR on fd - %s\n",
371 if (bind(ctcp
->listen_fd
, (struct sockaddr
* )&sock
, sock_size
) == 0) {
375 if (errno
== EADDRNOTAVAIL
) {
376 DEBUG(DEBUG_DEBUG
,(__location__
" Failed to bind() to socket. %s(%d)\n",
377 strerror(errno
), errno
));
379 DEBUG(DEBUG_ERR
,(__location__
" Failed to bind() to socket. %s(%d)\n",
380 strerror(errno
), errno
));
383 close(ctcp
->listen_fd
);
384 ctcp
->listen_fd
= -1;
387 if (i
== ctdb
->num_nodes
) {
388 DEBUG(DEBUG_CRIT
,("Unable to bind to any of the node addresses - giving up\n"));
391 ctdb
->address
= talloc_memdup(ctdb
,
392 &ctdb
->nodes
[i
]->address
,
393 sizeof(ctdb_sock_addr
));
394 if (ctdb
->address
== NULL
) {
395 ctdb_set_error(ctdb
, "Out of memory at %s:%d",
400 ctdb
->name
= talloc_asprintf(ctdb
, "%s:%u",
401 ctdb_addr_to_str(ctdb
->address
),
402 ctdb_addr_to_port(ctdb
->address
));
403 if (ctdb
->name
== NULL
) {
404 ctdb_set_error(ctdb
, "Out of memory at %s:%d",
408 DEBUG(DEBUG_INFO
,("ctdb chose network address %s\n", ctdb
->name
));
410 if (listen(ctcp
->listen_fd
, 10) == -1) {
414 fde
= tevent_add_fd(ctdb
->ev
, ctcp
, ctcp
->listen_fd
, TEVENT_FD_READ
,
415 ctdb_listen_event
, ctdb
);
416 tevent_fd_set_auto_close(fde
);
424 if (ctcp
->listen_fd
!= -1) {
425 close(ctcp
->listen_fd
);
426 ctcp
->listen_fd
= -1;
433 listen on our own address
435 int ctdb_tcp_listen(struct ctdb_context
*ctdb
)
437 struct ctdb_tcp
*ctcp
= talloc_get_type(ctdb
->private_data
,
442 struct tevent_fd
*fde
;
444 /* we can either auto-bind to the first available address, or we can
445 use a specified address */
446 if (!ctdb
->address
) {
447 return ctdb_tcp_listen_automatic(ctdb
);
450 sock
= *ctdb
->address
;
452 switch (sock
.sa
.sa_family
) {
454 sock_size
= sizeof(sock
.ip
);
457 sock_size
= sizeof(sock
.ip6
);
460 DEBUG(DEBUG_ERR
, (__location__
" unknown family %u\n",
465 ctcp
->listen_fd
= socket(sock
.sa
.sa_family
, SOCK_STREAM
, IPPROTO_TCP
);
466 if (ctcp
->listen_fd
== -1) {
467 ctdb_set_error(ctdb
, "socket failed\n");
471 set_close_on_exec(ctcp
->listen_fd
);
473 if (setsockopt(ctcp
->listen_fd
,SOL_SOCKET
,SO_REUSEADDR
,(char *)&one
,sizeof(one
)) == -1) {
474 DEBUG(DEBUG_WARNING
, ("Failed to set REUSEADDR on fd - %s\n",
478 if (bind(ctcp
->listen_fd
, (struct sockaddr
* )&sock
, sock_size
) != 0) {
479 DEBUG(DEBUG_ERR
,(__location__
" Failed to bind() to socket. %s(%d)\n", strerror(errno
), errno
));
483 if (listen(ctcp
->listen_fd
, 10) == -1) {
487 fde
= tevent_add_fd(ctdb
->ev
, ctcp
, ctcp
->listen_fd
, TEVENT_FD_READ
,
488 ctdb_listen_event
, ctdb
);
489 tevent_fd_set_auto_close(fde
);
494 if (ctcp
->listen_fd
!= -1) {
495 close(ctcp
->listen_fd
);
497 ctcp
->listen_fd
= -1;