ctdb-tools: Rework killtcp logic into a tevent_req-based computation
[Samba.git] / ctdb / tools / ctdb_killtcp.c
blob1917652ee66165a96407c4c9768fce4ad99b7084
1 /*
2 CTDB TCP connection killing utility
4 Copyright (C) Martin Schwenke <martin@meltin.net> 2016
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include <talloc.h>
21 #include <tevent.h>
23 #include "replace.h"
24 #include "system/network.h"
26 #include "lib/util/debug.h"
27 #include "lib/util/tevent_unix.h"
29 #include "protocol/protocol.h"
30 #include "protocol/protocol_util.h"
32 #include "common/db_hash.h"
33 #include "common/system.h"
34 #include "common/logging.h"
37 struct reset_connections_state {
38 struct tevent_context *ev;
39 int capture_fd;
40 struct tevent_fd *fde;
41 struct db_hash_context *connections;
42 void *private_data;
43 unsigned int attempts;
44 unsigned int max_attempts;
45 struct timeval retry_interval;
46 unsigned int batch_count;
47 unsigned int batch_size;
51 static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
52 struct tevent_fd *fde,
53 uint16_t flags,
54 void *private_data);
55 static void reset_connections_batch(struct tevent_req *subreq);
56 static int reset_connections_tickle_connection(
57 uint8_t *keybuf, size_t keylen,
58 uint8_t *databuf, size_t datalen,
59 void *private_data);
61 static struct tevent_req *reset_connections_send(
62 TALLOC_CTX *mem_ctx,
63 struct tevent_context *ev,
64 const char *iface,
65 struct ctdb_connection_list *conn_list)
67 struct tevent_req *req, *subreq;
68 struct reset_connections_state *state;
69 int i, ret;
71 req = tevent_req_create(mem_ctx, &state,
72 struct reset_connections_state);
73 if (req == NULL) {
74 return NULL;
77 state->ev = ev;
79 ret = db_hash_init(state, "connections", 2048, DB_HASH_SIMPLE,
80 &state->connections);
81 if (ret != 0) {
82 D_ERR("Failed to initialise connection hash (%s)\n",
83 strerror(ret));
84 tevent_req_error(req, ret);
85 return tevent_req_post(req, ev);
88 for (i = 0; i < conn_list->num; i++) {
89 struct ctdb_connection *c = &conn_list->conn[i];
91 /* Connection is stored as a key in the connections hash */
92 ret = db_hash_add(state->connections,
93 (uint8_t *)discard_const(c), sizeof(*c),
94 NULL, 0);
95 if (ret != 0) {
96 D_ERR("Error adding connection to hash (%s)\n",
97 strerror(ret));
98 tevent_req_error(req, ret);
99 return tevent_req_post(req, ev);
103 state->attempts = 0;
104 state->max_attempts = 50;
106 state->retry_interval.tv_sec = 0;
107 state->retry_interval.tv_usec = 100 * 1000;
109 state->batch_count = 0;
110 state->batch_size = 300;
112 state->capture_fd =
113 ctdb_sys_open_capture_socket(iface, &state->private_data);
114 if (state->capture_fd == -1) {
115 D_ERR("Failed to open capture socket on iface '%s' (%s)\n",
116 iface, strerror(errno));
117 tevent_req_error(req, EIO);
118 return tevent_req_post(req, ev);
121 state->fde = tevent_add_fd(ev, state, state->capture_fd,
122 TEVENT_FD_READ,
123 reset_connections_capture_tcp_handler,
124 state);
125 if (tevent_req_nomem(state->fde, req)) {
126 return tevent_req_post(req, ev);
128 tevent_fd_set_auto_close(state->fde);
130 subreq = tevent_wakeup_send(state, ev, tevent_timeval_current_ofs(0,0));
131 if (tevent_req_nomem(subreq, req)) {
132 return tevent_req_post(req, ev);
134 tevent_req_set_callback(subreq, reset_connections_batch, req);
136 return req;
140 called when we get a read event on the raw socket
142 static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
143 struct tevent_fd *fde,
144 uint16_t flags,
145 void *private_data)
147 struct reset_connections_state *state = talloc_get_type_abort(
148 private_data, struct reset_connections_state);
149 /* 0 the parts that don't get set by ctdb_sys_read_tcp_packet */
150 struct ctdb_connection conn;
151 uint32_t ack_seq, seq;
152 int rst;
153 uint16_t window;
154 int ret;
156 ret = ctdb_sys_read_tcp_packet(state->capture_fd,
157 state->private_data,
158 &conn.server, &conn.client,
159 &ack_seq, &seq, &rst, &window);
160 if (ret != 0) {
161 /* probably a non-tcp ACK packet */
162 return;
165 if (window == htons(1234) && (rst || seq == 0)) {
166 /* Ignore packets that we sent! */
167 D_DEBUG("Ignoring packet: %s, "
168 "seq=%"PRIu32", ack_seq=%"PRIu32", "
169 "rst=%d, window=%"PRIu16"\n",
170 ctdb_connection_to_string(state, &conn, false),
171 seq, ack_seq, rst, ntohs(window));
172 return;
175 /* Check if this connection is one being reset, if found then delete */
176 ret = db_hash_delete(state->connections,
177 (uint8_t*)&conn, sizeof(conn));
178 if (ret == ENOENT) {
179 /* Packet for some other connection, ignore */
180 return;
182 if (ret != 0) {
183 DBG_WARNING("Internal error (%s)\n", strerror(ret));
184 return;
187 D_INFO("Sending a TCP RST to for connection %s\n",
188 ctdb_connection_to_string(state, &conn, true));
190 ret = ctdb_sys_send_tcp(&conn.server, &conn.client, ack_seq, seq, 1);
191 if (ret != 0) {
192 DBG_ERR("Error sending TCP RST for connection\n");
197 * Called periodically until all sentenced connections have been reset
198 * or enough attempts have been made
200 static void reset_connections_batch(struct tevent_req *subreq)
202 struct tevent_req *req = tevent_req_callback_data(
203 subreq, struct tevent_req);
204 struct reset_connections_state *state = tevent_req_data(
205 req, struct reset_connections_state);
206 bool status;
207 int count, ret;
209 status = tevent_wakeup_recv(subreq);
210 TALLOC_FREE(subreq);
212 if (! status) {
213 DBG_WARNING("Unexpected error on timer expiry\n");
214 /* Keep going... */
217 /* loop over up to batch_size connections sending tickle ACKs */
218 state->batch_count = 0;
219 ret = db_hash_traverse(state->connections,
220 reset_connections_tickle_connection,
221 state, NULL);
222 if (ret != 0) {
223 DBG_WARNING("Unexpected error traversing connections (%s)\n",
224 strerror(ret));
227 state->attempts++;
230 * If there are no more connections to kill or we have tried
231 * too many times we're finished
233 ret = db_hash_traverse(state->connections, NULL, NULL, &count);
234 if (ret != 0) {
235 /* What now? Try again until max_attempts reached */
236 DBG_WARNING("Unexpected error traversing connections (%s)\n",
237 strerror(ret));
238 count = 1;
240 if (count == 0 ||
241 state->attempts >= state->max_attempts) {
242 tevent_req_done(req);
243 return;
246 /* Schedule next attempt */
247 subreq = tevent_wakeup_send(state, state->ev,
248 tevent_timeval_current_ofs(
249 state->retry_interval.tv_sec,
250 state->retry_interval.tv_usec));
251 if (tevent_req_nomem(subreq, req)) {
252 return;
254 tevent_req_set_callback(subreq, reset_connections_batch, req);
257 static int reset_connections_tickle_connection(
258 uint8_t *keybuf, size_t keylen,
259 uint8_t *databuf, size_t datalen,
260 void *private_data)
262 struct reset_connections_state *state = talloc_get_type_abort(
263 private_data, struct reset_connections_state);
264 struct ctdb_connection *conn;
265 int ret;
267 if (keylen != sizeof(*conn)) {
268 DBG_WARNING("Unexpected data in connection hash\n");
269 return 0;
272 conn = (struct ctdb_connection *)keybuf;
274 state->batch_count++;
275 if (state->batch_count > state->batch_size) {
276 /* Terminate the traverse */
277 return 1;
280 ret = ctdb_sys_send_tcp(&conn->server, &conn->client, 0, 0, 0);
281 if (ret != 0) {
282 DBG_ERR("Error sending tickle ACK\n");
283 /* continue */
286 return 0;
289 static bool reset_connections_recv(struct tevent_req *req, int *perr)
291 int err;
293 if (tevent_req_is_unix_error(req, &err)) {
294 if (perr != NULL) {
295 *perr = err;
297 return false;
300 return true;
303 static void usage(const char *prog)
305 printf("usage: %s <interface> [ <srcip:port> <dstip:port> ]\n", prog);
306 exit(1);
309 int main(int argc, char **argv)
311 struct ctdb_connection conn;
312 struct tevent_context *ev = NULL;
313 struct TALLOC_CONTEXT *mem_ctx = NULL;
314 struct ctdb_connection_list *conn_list = NULL;
315 const char *t;
316 struct tevent_req *req;
317 int debug_level;
318 bool status;
319 int ret;
321 /* Set the debug level */
322 t = getenv("CTDB_DEBUGLEVEL");
323 if (t != NULL) {
324 if (debug_level_parse(t, &debug_level)) {
325 DEBUGLEVEL = debug_level;
326 } else {
327 DEBUGLEVEL = DEBUG_ERR;
331 if (argc != 2 && argc != 4) {
332 usage(argv[0]);
335 if (argc == 4) {
336 ret = ctdb_sock_addr_from_string(argv[2], &conn.client, true);
337 if (ret != 0) {
338 D_ERR("Bad IP:port '%s'\n", argv[2]);
339 goto fail;
342 ret = ctdb_sock_addr_from_string(argv[3], &conn.server, true);
343 if (ret != 0) {
344 D_ERR("Bad IP:port '%s'\n", argv[3]);
345 goto fail;
349 conn_list = talloc_zero(mem_ctx, struct ctdb_connection_list);
350 if (conn_list == NULL) {
351 ret = ENOMEM;
352 DBG_ERR("Internal error (%s)\n", strerror(ret));
353 goto fail;
355 ret = ctdb_connection_list_add(conn_list, &conn);
356 if (ret != 0) {
357 DBG_ERR("Internal error (%s)\n", strerror(ret));
358 goto fail;
360 } else {
361 ret = ctdb_connection_list_read(mem_ctx, true, &conn_list);
362 if (ret != 0) {
363 D_ERR("Unable to parse connections (%s)\n",
364 strerror(ret));
365 goto fail;
369 mem_ctx = talloc_new(NULL);
370 if (mem_ctx == NULL) {
371 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
372 goto fail;
375 ev = tevent_context_init(mem_ctx);
376 if (ev == NULL) {
377 DEBUG(DEBUG_ERR, ("Failed to initialise tevent\n"));
378 goto fail;
381 if (conn_list->num == 0) {
382 /* No connections, done! */
383 talloc_free(mem_ctx);
384 return 0;
387 req = reset_connections_send(mem_ctx, ev, argv[1], conn_list);
388 if (req == NULL) {
389 goto fail;
392 tevent_req_poll(req, ev);
394 status = reset_connections_recv(req, &ret);
395 if (! status) {
396 D_ERR("Failed to kill connections (%s)\n", strerror(ret));
397 goto fail;
400 talloc_free(mem_ctx);
402 return 0;
404 fail:
405 TALLOC_FREE(mem_ctx);
406 return -1;