ctdb-tests: Provide a method to dump the stack on abort
[samba.git] / ctdb / server / ctdb_daemon.c
blobedd7d57532b6af82980e11230db02435cfee3541
1 /*
2 ctdb daemon code
4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "system/time.h"
26 #include <talloc.h>
27 /* Allow use of deprecated function tevent_loop_allow_nesting() */
28 #define TEVENT_DEPRECATED
29 #include <tevent.h>
30 #include <tdb.h>
32 #include "lib/tdb_wrap/tdb_wrap.h"
33 #include "lib/util/dlinklist.h"
34 #include "lib/util/debug.h"
35 #include "lib/util/time.h"
36 #include "lib/util/blocking.h"
37 #include "lib/util/become_daemon.h"
39 #include "version.h"
40 #include "ctdb_private.h"
41 #include "ctdb_client.h"
43 #include "common/rb_tree.h"
44 #include "common/reqid.h"
45 #include "common/system.h"
46 #include "common/common.h"
47 #include "common/logging.h"
48 #include "common/pidfile.h"
49 #include "common/sock_io.h"
51 struct ctdb_client_pid_list {
52 struct ctdb_client_pid_list *next, *prev;
53 struct ctdb_context *ctdb;
54 pid_t pid;
55 struct ctdb_client *client;
58 const char *ctdbd_pidfile = NULL;
59 static struct pidfile_context *ctdbd_pidfile_ctx = NULL;
61 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
63 static pid_t __ctdbd_pid;
65 static void print_exit_message(void)
67 if (getpid() == __ctdbd_pid) {
68 DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n"));
70 /* Wait a second to allow pending log messages to be flushed */
71 sleep(1);
75 #ifdef HAVE_GETRUSAGE
77 struct cpu_check_threshold_data {
78 unsigned short percent;
79 struct timeval timeofday;
80 struct timeval ru_time;
83 static void ctdb_cpu_check_threshold(struct tevent_context *ev,
84 struct tevent_timer *te,
85 struct timeval tv,
86 void *private_data)
88 struct ctdb_context *ctdb = talloc_get_type_abort(
89 private_data, struct ctdb_context);
90 uint32_t interval = 60;
92 static unsigned short threshold = 0;
93 static struct cpu_check_threshold_data prev = {
94 .percent = 0,
95 .timeofday = { .tv_sec = 0 },
96 .ru_time = { .tv_sec = 0 },
99 struct rusage usage;
100 struct cpu_check_threshold_data curr = {
101 .percent = 0,
103 int64_t ru_time_diff, timeofday_diff;
104 bool first;
105 int ret;
108 * Cache the threshold so that we don't waste time checking
109 * the environment variable every time
111 if (threshold == 0) {
112 const char *t;
114 threshold = 90;
116 t = getenv("CTDB_TEST_CPU_USAGE_THRESHOLD");
117 if (t != NULL) {
118 int th;
120 th = atoi(t);
121 if (th <= 0 || th > 100) {
122 DBG_WARNING("Failed to parse env var: %s\n", t);
123 } else {
124 threshold = th;
129 ret = getrusage(RUSAGE_SELF, &usage);
130 if (ret != 0) {
131 DBG_WARNING("rusage() failed: %d\n", ret);
132 goto next;
135 /* Sum the system and user CPU usage */
136 curr.ru_time = timeval_sum(&usage.ru_utime, &usage.ru_stime);
138 curr.timeofday = tv;
140 first = timeval_is_zero(&prev.timeofday);
141 if (first) {
142 /* No previous values recorded so no calculation to do */
143 goto done;
146 timeofday_diff = usec_time_diff(&curr.timeofday, &prev.timeofday);
147 if (timeofday_diff <= 0) {
149 * Time went backwards or didn't progress so no (sane)
150 * calculation can be done
152 goto done;
155 ru_time_diff = usec_time_diff(&curr.ru_time, &prev.ru_time);
157 curr.percent = ru_time_diff * 100 / timeofday_diff;
159 if (curr.percent >= threshold) {
160 /* Log only if the utilisation changes */
161 if (curr.percent != prev.percent) {
162 D_WARNING("WARNING: CPU utilisation %hu%% >= "
163 "threshold (%hu%%)\n",
164 curr.percent,
165 threshold);
167 } else {
168 /* Log if the utilisation falls below the threshold */
169 if (prev.percent >= threshold) {
170 D_WARNING("WARNING: CPU utilisation %hu%% < "
171 "threshold (%hu%%)\n",
172 curr.percent,
173 threshold);
177 done:
178 prev = curr;
180 next:
181 tevent_add_timer(ctdb->ev, ctdb,
182 timeval_current_ofs(interval, 0),
183 ctdb_cpu_check_threshold,
184 ctdb);
187 static void ctdb_start_cpu_check_threshold(struct ctdb_context *ctdb)
189 tevent_add_timer(ctdb->ev, ctdb,
190 timeval_current(),
191 ctdb_cpu_check_threshold,
192 ctdb);
194 #endif /* HAVE_GETRUSAGE */
196 static void ctdb_time_tick(struct tevent_context *ev, struct tevent_timer *te,
197 struct timeval t, void *private_data)
199 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
201 if (getpid() != ctdb->ctdbd_pid) {
202 return;
205 tevent_add_timer(ctdb->ev, ctdb,
206 timeval_current_ofs(1, 0),
207 ctdb_time_tick, ctdb);
210 /* Used to trigger a dummy event once per second, to make
211 * detection of hangs more reliable.
213 static void ctdb_start_time_tickd(struct ctdb_context *ctdb)
215 tevent_add_timer(ctdb->ev, ctdb,
216 timeval_current_ofs(1, 0),
217 ctdb_time_tick, ctdb);
220 static void ctdb_start_periodic_events(struct ctdb_context *ctdb)
222 /* start monitoring for connected/disconnected nodes */
223 ctdb_start_keepalive(ctdb);
225 /* start periodic update of tcp tickle lists */
226 ctdb_start_tcp_tickle_update(ctdb);
228 /* start listening for recovery daemon pings */
229 ctdb_control_recd_ping(ctdb);
231 /* start listening to timer ticks */
232 ctdb_start_time_tickd(ctdb);
234 #ifdef HAVE_GETRUSAGE
235 ctdb_start_cpu_check_threshold(ctdb);
236 #endif /* HAVE_GETRUSAGE */
239 static void ignore_signal(int signum)
241 struct sigaction act;
243 memset(&act, 0, sizeof(act));
245 act.sa_handler = SIG_IGN;
246 sigemptyset(&act.sa_mask);
247 sigaddset(&act.sa_mask, signum);
248 sigaction(signum, &act, NULL);
253 send a packet to a client
255 static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
257 CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent);
258 if (hdr->operation == CTDB_REQ_MESSAGE) {
259 if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) {
260 DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n"));
261 talloc_free(client);
262 return -1;
265 return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length);
269 message handler for when we are in daemon mode. This redirects the message
270 to the right client
272 static void daemon_message_handler(uint64_t srvid, TDB_DATA data,
273 void *private_data)
275 struct ctdb_client *client = talloc_get_type(private_data, struct ctdb_client);
276 struct ctdb_req_message_old *r;
277 int len;
279 /* construct a message to send to the client containing the data */
280 len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
281 r = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_MESSAGE,
282 len, struct ctdb_req_message_old);
283 CTDB_NO_MEMORY_VOID(client->ctdb, r);
285 talloc_set_name_const(r, "req_message packet");
287 r->srvid = srvid;
288 r->datalen = data.dsize;
289 memcpy(&r->data[0], data.dptr, data.dsize);
291 daemon_queue_send(client, &r->hdr);
293 talloc_free(r);
297 this is called when the ctdb daemon received a ctdb request to
298 set the srvid from the client
300 int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
302 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
303 int res;
304 if (client == NULL) {
305 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_register_message_handler\n"));
306 return -1;
308 res = srvid_register(ctdb->srv, client, srvid, daemon_message_handler,
309 client);
310 if (res != 0) {
311 DEBUG(DEBUG_ERR,(__location__ " Failed to register handler %llu in daemon\n",
312 (unsigned long long)srvid));
313 } else {
314 DEBUG(DEBUG_INFO,(__location__ " Registered message handler for srvid=%llu\n",
315 (unsigned long long)srvid));
318 return res;
322 this is called when the ctdb daemon received a ctdb request to
323 remove a srvid from the client
325 int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
327 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
328 if (client == NULL) {
329 DEBUG(DEBUG_ERR,("Bad client_id in daemon_request_deregister_message_handler\n"));
330 return -1;
332 return srvid_deregister(ctdb->srv, srvid, client);
335 void daemon_tunnel_handler(uint64_t tunnel_id, TDB_DATA data,
336 void *private_data)
338 struct ctdb_client *client =
339 talloc_get_type_abort(private_data, struct ctdb_client);
340 struct ctdb_req_tunnel_old *c, *pkt;
341 size_t len;
343 pkt = (struct ctdb_req_tunnel_old *)data.dptr;
345 len = offsetof(struct ctdb_req_tunnel_old, data) + pkt->datalen;
346 c = ctdbd_allocate_pkt(client->ctdb, client->ctdb, CTDB_REQ_TUNNEL,
347 len, struct ctdb_req_tunnel_old);
348 if (c == NULL) {
349 DEBUG(DEBUG_ERR, ("Memory error in daemon_tunnel_handler\n"));
350 return;
353 talloc_set_name_const(c, "req_tunnel packet");
355 c->tunnel_id = tunnel_id;
356 c->flags = pkt->flags;
357 c->datalen = pkt->datalen;
358 memcpy(c->data, pkt->data, pkt->datalen);
360 daemon_queue_send(client, &c->hdr);
362 talloc_free(c);
366 destroy a ctdb_client
368 static int ctdb_client_destructor(struct ctdb_client *client)
370 struct ctdb_db_context *ctdb_db;
372 ctdb_takeover_client_destructor_hook(client);
373 reqid_remove(client->ctdb->idr, client->client_id);
374 client->ctdb->num_clients--;
376 if (client->num_persistent_updates != 0) {
377 DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
378 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
380 ctdb_db = find_ctdb_db(client->ctdb, client->db_id);
381 if (ctdb_db) {
382 DEBUG(DEBUG_ERR, (__location__ " client exit while transaction "
383 "commit active. Forcing recovery.\n"));
384 client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
387 * trans3 transaction state:
389 * The destructor sets the pointer to NULL.
391 talloc_free(ctdb_db->persistent_state);
394 return 0;
399 this is called when the ctdb daemon received a ctdb request message
400 from a local client over the unix domain socket
402 static void daemon_request_message_from_client(struct ctdb_client *client,
403 struct ctdb_req_message_old *c)
405 TDB_DATA data;
406 int res;
408 if (c->hdr.destnode == CTDB_CURRENT_NODE) {
409 c->hdr.destnode = ctdb_get_pnn(client->ctdb);
412 /* maybe the message is for another client on this node */
413 if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) {
414 ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
415 return;
418 /* its for a remote node */
419 data.dptr = &c->data[0];
420 data.dsize = c->datalen;
421 res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
422 c->srvid, data);
423 if (res != 0) {
424 DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n",
425 c->hdr.destnode));
430 struct daemon_call_state {
431 struct ctdb_client *client;
432 uint32_t reqid;
433 struct ctdb_call *call;
434 struct timeval start_time;
436 /* readonly request ? */
437 uint32_t readonly_fetch;
438 uint32_t client_callid;
442 complete a call from a client
444 static void daemon_call_from_client_callback(struct ctdb_call_state *state)
446 struct daemon_call_state *dstate = talloc_get_type(state->async.private_data,
447 struct daemon_call_state);
448 struct ctdb_reply_call_old *r;
449 int res;
450 uint32_t length;
451 struct ctdb_client *client = dstate->client;
452 struct ctdb_db_context *ctdb_db = state->ctdb_db;
454 talloc_steal(client, dstate);
455 talloc_steal(dstate, dstate->call);
457 res = ctdb_daemon_call_recv(state, dstate->call);
458 if (res != 0) {
459 DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
460 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
462 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time);
463 return;
466 length = offsetof(struct ctdb_reply_call_old, data) + dstate->call->reply_data.dsize;
467 /* If the client asked for readonly FETCH, we remapped this to
468 FETCH_WITH_HEADER when calling the daemon. So we must
469 strip the extra header off the reply data before passing
470 it back to the client.
472 if (dstate->readonly_fetch
473 && dstate->client_callid == CTDB_FETCH_FUNC) {
474 length -= sizeof(struct ctdb_ltdb_header);
477 r = ctdbd_allocate_pkt(client->ctdb, dstate, CTDB_REPLY_CALL,
478 length, struct ctdb_reply_call_old);
479 if (r == NULL) {
480 DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
481 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
482 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time);
483 return;
485 r->hdr.reqid = dstate->reqid;
486 r->status = dstate->call->status;
488 if (dstate->readonly_fetch
489 && dstate->client_callid == CTDB_FETCH_FUNC) {
490 /* client only asked for a FETCH so we must strip off
491 the extra ctdb_ltdb header
493 r->datalen = dstate->call->reply_data.dsize - sizeof(struct ctdb_ltdb_header);
494 memcpy(&r->data[0], dstate->call->reply_data.dptr + sizeof(struct ctdb_ltdb_header), r->datalen);
495 } else {
496 r->datalen = dstate->call->reply_data.dsize;
497 memcpy(&r->data[0], dstate->call->reply_data.dptr, r->datalen);
500 res = daemon_queue_send(client, &r->hdr);
501 if (res == -1) {
502 /* client is dead - return immediately */
503 return;
505 if (res != 0) {
506 DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
508 CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time);
509 CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
510 talloc_free(dstate);
513 struct ctdb_daemon_packet_wrap {
514 struct ctdb_context *ctdb;
515 uint32_t client_id;
519 a wrapper to catch disconnected clients
521 static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr)
523 struct ctdb_client *client;
524 struct ctdb_daemon_packet_wrap *w = talloc_get_type(p,
525 struct ctdb_daemon_packet_wrap);
526 if (w == NULL) {
527 DEBUG(DEBUG_CRIT,(__location__ " Bad packet type '%s'\n", talloc_get_name(p)));
528 return;
531 client = reqid_find(w->ctdb->idr, w->client_id, struct ctdb_client);
532 if (client == NULL) {
533 DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
534 w->client_id));
535 talloc_free(w);
536 return;
538 talloc_free(w);
540 /* process it */
541 daemon_incoming_packet(client, hdr);
544 struct ctdb_deferred_fetch_call {
545 struct ctdb_deferred_fetch_call *next, *prev;
546 struct ctdb_req_call_old *c;
547 struct ctdb_daemon_packet_wrap *w;
550 struct ctdb_deferred_fetch_queue {
551 struct ctdb_deferred_fetch_call *deferred_calls;
554 struct ctdb_deferred_requeue {
555 struct ctdb_deferred_fetch_call *dfc;
556 struct ctdb_client *client;
559 /* called from a timer event and starts reprocessing the deferred call.*/
560 static void reprocess_deferred_call(struct tevent_context *ev,
561 struct tevent_timer *te,
562 struct timeval t, void *private_data)
564 struct ctdb_deferred_requeue *dfr = (struct ctdb_deferred_requeue *)private_data;
565 struct ctdb_client *client = dfr->client;
567 talloc_steal(client, dfr->dfc->c);
568 daemon_incoming_packet(client, (struct ctdb_req_header *)dfr->dfc->c);
569 talloc_free(dfr);
572 /* the referral context is destroyed either after a timeout or when the initial
573 fetch-lock has finished.
574 at this stage, immediately start reprocessing the queued up deferred
575 calls so they get reprocessed immediately (and since we are dmaster at
576 this stage, trigger the waiting smbd processes to pick up and acquire the
577 record right away.
579 static int deferred_fetch_queue_destructor(struct ctdb_deferred_fetch_queue *dfq)
582 /* need to reprocess the packets from the queue explicitly instead of
583 just using a normal destructor since we need to
584 call the clients in the same order as the requests queued up
586 while (dfq->deferred_calls != NULL) {
587 struct ctdb_client *client;
588 struct ctdb_deferred_fetch_call *dfc = dfq->deferred_calls;
589 struct ctdb_deferred_requeue *dfr;
591 DLIST_REMOVE(dfq->deferred_calls, dfc);
593 client = reqid_find(dfc->w->ctdb->idr, dfc->w->client_id, struct ctdb_client);
594 if (client == NULL) {
595 DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
596 dfc->w->client_id));
597 continue;
600 /* process it by pushing it back onto the eventloop */
601 dfr = talloc(client, struct ctdb_deferred_requeue);
602 if (dfr == NULL) {
603 DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch requeue structure\n"));
604 continue;
607 dfr->dfc = talloc_steal(dfr, dfc);
608 dfr->client = client;
610 tevent_add_timer(dfc->w->ctdb->ev, client, timeval_zero(),
611 reprocess_deferred_call, dfr);
614 return 0;
617 /* insert the new deferral context into the rb tree.
618 there should never be a pre-existing context here, but check for it
619 warn and destroy the previous context if there is already a deferral context
620 for this key.
622 static void *insert_dfq_callback(void *parm, void *data)
624 if (data) {
625 DEBUG(DEBUG_ERR,("Already have DFQ registered. Free old %p and create new %p\n", data, parm));
626 talloc_free(data);
628 return parm;
631 /* if the original fetch-lock did not complete within a reasonable time,
632 free the context and context for all deferred requests to cause them to be
633 re-inserted into the event system.
635 static void dfq_timeout(struct tevent_context *ev, struct tevent_timer *te,
636 struct timeval t, void *private_data)
638 talloc_free(private_data);
641 /* This function is used in the local daemon to register a KEY in a database
642 for being "fetched"
643 While the remote fetch is in-flight, any futher attempts to re-fetch the
644 same record will be deferred until the fetch completes.
646 static int setup_deferred_fetch_locks(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
648 uint32_t *k;
649 struct ctdb_deferred_fetch_queue *dfq;
651 k = ctdb_key_to_idkey(call, call->key);
652 if (k == NULL) {
653 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
654 return -1;
657 dfq = talloc(call, struct ctdb_deferred_fetch_queue);
658 if (dfq == NULL) {
659 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch queue structure\n"));
660 talloc_free(k);
661 return -1;
663 dfq->deferred_calls = NULL;
665 trbt_insertarray32_callback(ctdb_db->deferred_fetch, k[0], &k[0], insert_dfq_callback, dfq);
667 talloc_set_destructor(dfq, deferred_fetch_queue_destructor);
669 /* if the fetch havent completed in 30 seconds, just tear it all down
670 and let it try again as the events are reissued */
671 tevent_add_timer(ctdb_db->ctdb->ev, dfq, timeval_current_ofs(30, 0),
672 dfq_timeout, dfq);
674 talloc_free(k);
675 return 0;
678 /* check if this is a duplicate request to a fetch already in-flight
679 if it is, make this call deferred to be reprocessed later when
680 the in-flight fetch completes.
682 static int requeue_duplicate_fetch(struct ctdb_db_context *ctdb_db, struct ctdb_client *client, TDB_DATA key, struct ctdb_req_call_old *c)
684 uint32_t *k;
685 struct ctdb_deferred_fetch_queue *dfq;
686 struct ctdb_deferred_fetch_call *dfc;
688 k = ctdb_key_to_idkey(c, key);
689 if (k == NULL) {
690 DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
691 return -1;
694 dfq = trbt_lookuparray32(ctdb_db->deferred_fetch, k[0], &k[0]);
695 if (dfq == NULL) {
696 talloc_free(k);
697 return -1;
701 talloc_free(k);
703 dfc = talloc(dfq, struct ctdb_deferred_fetch_call);
704 if (dfc == NULL) {
705 DEBUG(DEBUG_ERR, ("Failed to allocate deferred fetch call structure\n"));
706 return -1;
709 dfc->w = talloc(dfc, struct ctdb_daemon_packet_wrap);
710 if (dfc->w == NULL) {
711 DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch daemon packet wrap structure\n"));
712 talloc_free(dfc);
713 return -1;
716 dfc->c = talloc_steal(dfc, c);
717 dfc->w->ctdb = ctdb_db->ctdb;
718 dfc->w->client_id = client->client_id;
720 DLIST_ADD_END(dfq->deferred_calls, dfc);
722 return 0;
727 this is called when the ctdb daemon received a ctdb request call
728 from a local client over the unix domain socket
730 static void daemon_request_call_from_client(struct ctdb_client *client,
731 struct ctdb_req_call_old *c)
733 struct ctdb_call_state *state;
734 struct ctdb_db_context *ctdb_db;
735 struct daemon_call_state *dstate;
736 struct ctdb_call *call;
737 struct ctdb_ltdb_header header;
738 TDB_DATA key, data;
739 int ret;
740 struct ctdb_context *ctdb = client->ctdb;
741 struct ctdb_daemon_packet_wrap *w;
743 CTDB_INCREMENT_STAT(ctdb, total_calls);
744 CTDB_INCREMENT_STAT(ctdb, pending_calls);
746 ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
747 if (!ctdb_db) {
748 DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x",
749 c->db_id));
750 CTDB_DECREMENT_STAT(ctdb, pending_calls);
751 return;
754 if (ctdb_db->unhealthy_reason) {
756 * this is just a warning, as the tdb should be empty anyway,
757 * and only persistent databases can be unhealthy, which doesn't
758 * use this code patch
760 DEBUG(DEBUG_WARNING,("warn: db(%s) unhealty in daemon_request_call_from_client(): %s\n",
761 ctdb_db->db_name, ctdb_db->unhealthy_reason));
764 key.dptr = c->data;
765 key.dsize = c->keylen;
767 w = talloc(ctdb, struct ctdb_daemon_packet_wrap);
768 CTDB_NO_MEMORY_VOID(ctdb, w);
770 w->ctdb = ctdb;
771 w->client_id = client->client_id;
773 ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header,
774 (struct ctdb_req_header *)c, &data,
775 daemon_incoming_packet_wrap, w, true);
776 if (ret == -2) {
777 /* will retry later */
778 CTDB_DECREMENT_STAT(ctdb, pending_calls);
779 return;
782 talloc_free(w);
784 if (ret != 0) {
785 DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n"));
786 CTDB_DECREMENT_STAT(ctdb, pending_calls);
787 return;
791 /* check if this fetch request is a duplicate for a
792 request we already have in flight. If so defer it until
793 the first request completes.
795 if (ctdb->tunable.fetch_collapse == 1) {
796 if (requeue_duplicate_fetch(ctdb_db, client, key, c) == 0) {
797 ret = ctdb_ltdb_unlock(ctdb_db, key);
798 if (ret != 0) {
799 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
801 CTDB_DECREMENT_STAT(ctdb, pending_calls);
802 talloc_free(data.dptr);
803 return;
807 /* Dont do READONLY if we don't have a tracking database */
808 if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db_readonly(ctdb_db)) {
809 c->flags &= ~CTDB_WANT_READONLY;
812 if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) {
813 header.flags &= ~CTDB_REC_RO_FLAGS;
814 CTDB_INCREMENT_STAT(ctdb, total_ro_revokes);
815 CTDB_INCREMENT_DB_STAT(ctdb_db, db_ro_revokes);
816 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
817 ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag");
819 /* and clear out the tracking data */
820 if (tdb_delete(ctdb_db->rottdb, key) != 0) {
821 DEBUG(DEBUG_ERR,(__location__ " Failed to clear out trackingdb record\n"));
825 /* if we are revoking, we must defer all other calls until the revoke
826 * had completed.
828 if (header.flags & CTDB_REC_RO_REVOKING_READONLY) {
829 talloc_free(data.dptr);
830 ret = ctdb_ltdb_unlock(ctdb_db, key);
832 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
833 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
835 CTDB_DECREMENT_STAT(ctdb, pending_calls);
836 return;
839 if ((header.dmaster == ctdb->pnn)
840 && (!(c->flags & CTDB_WANT_READONLY))
841 && (header.flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY)) ) {
842 header.flags |= CTDB_REC_RO_REVOKING_READONLY;
843 if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
844 ctdb_fatal(ctdb, "Failed to store record with HAVE_DELEGATIONS set");
846 ret = ctdb_ltdb_unlock(ctdb_db, key);
848 if (ctdb_start_revoke_ro_record(ctdb, ctdb_db, key, &header, data) != 0) {
849 ctdb_fatal(ctdb, "Failed to start record revoke");
851 talloc_free(data.dptr);
853 if (ctdb_add_revoke_deferred_call(ctdb, ctdb_db, key, (struct ctdb_req_header *)c, daemon_incoming_packet, client) != 0) {
854 ctdb_fatal(ctdb, "Failed to add deferred call for revoke child");
857 CTDB_DECREMENT_STAT(ctdb, pending_calls);
858 return;
861 dstate = talloc(client, struct daemon_call_state);
862 if (dstate == NULL) {
863 ret = ctdb_ltdb_unlock(ctdb_db, key);
864 if (ret != 0) {
865 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
868 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n"));
869 CTDB_DECREMENT_STAT(ctdb, pending_calls);
870 return;
872 dstate->start_time = timeval_current();
873 dstate->client = client;
874 dstate->reqid = c->hdr.reqid;
875 talloc_steal(dstate, data.dptr);
877 call = dstate->call = talloc_zero(dstate, struct ctdb_call);
878 if (call == NULL) {
879 ret = ctdb_ltdb_unlock(ctdb_db, key);
880 if (ret != 0) {
881 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
884 DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n"));
885 CTDB_DECREMENT_STAT(ctdb, pending_calls);
886 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time);
887 return;
890 dstate->readonly_fetch = 0;
891 call->call_id = c->callid;
892 call->key = key;
893 call->call_data.dptr = c->data + c->keylen;
894 call->call_data.dsize = c->calldatalen;
895 call->flags = c->flags;
897 if (c->flags & CTDB_WANT_READONLY) {
898 /* client wants readonly record, so translate this into a
899 fetch with header. remember what the client asked for
900 so we can remap the reply back to the proper format for
901 the client in the reply
903 dstate->client_callid = call->call_id;
904 call->call_id = CTDB_FETCH_WITH_HEADER_FUNC;
905 dstate->readonly_fetch = 1;
908 if (header.dmaster == ctdb->pnn) {
909 state = ctdb_call_local_send(ctdb_db, call, &header, &data);
910 } else {
911 state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
912 if (ctdb->tunable.fetch_collapse == 1) {
913 /* This request triggered a remote fetch-lock.
914 set up a deferral for this key so any additional
915 fetch-locks are deferred until the current one
916 finishes.
918 setup_deferred_fetch_locks(ctdb_db, call);
922 ret = ctdb_ltdb_unlock(ctdb_db, key);
923 if (ret != 0) {
924 DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
927 if (state == NULL) {
928 DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n"));
929 CTDB_DECREMENT_STAT(ctdb, pending_calls);
930 CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time);
931 return;
933 talloc_steal(state, dstate);
934 talloc_steal(client, state);
936 state->async.fn = daemon_call_from_client_callback;
937 state->async.private_data = dstate;
941 static void daemon_request_control_from_client(struct ctdb_client *client,
942 struct ctdb_req_control_old *c);
943 static void daemon_request_tunnel_from_client(struct ctdb_client *client,
944 struct ctdb_req_tunnel_old *c);
946 /* data contains a packet from the client */
947 static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr)
949 struct ctdb_client *client = talloc_get_type(p, struct ctdb_client);
950 TALLOC_CTX *tmp_ctx;
951 struct ctdb_context *ctdb = client->ctdb;
953 /* place the packet as a child of a tmp_ctx. We then use
954 talloc_free() below to free it. If any of the calls want
955 to keep it, then they will steal it somewhere else, and the
956 talloc_free() will be a no-op */
957 tmp_ctx = talloc_new(client);
958 talloc_steal(tmp_ctx, hdr);
960 if (hdr->ctdb_magic != CTDB_MAGIC) {
961 ctdb_set_error(client->ctdb, "Non CTDB packet rejected in daemon\n");
962 goto done;
965 if (hdr->ctdb_version != CTDB_PROTOCOL) {
966 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
967 goto done;
970 switch (hdr->operation) {
971 case CTDB_REQ_CALL:
972 CTDB_INCREMENT_STAT(ctdb, client.req_call);
973 daemon_request_call_from_client(client, (struct ctdb_req_call_old *)hdr);
974 break;
976 case CTDB_REQ_MESSAGE:
977 CTDB_INCREMENT_STAT(ctdb, client.req_message);
978 daemon_request_message_from_client(client, (struct ctdb_req_message_old *)hdr);
979 break;
981 case CTDB_REQ_CONTROL:
982 CTDB_INCREMENT_STAT(ctdb, client.req_control);
983 daemon_request_control_from_client(client, (struct ctdb_req_control_old *)hdr);
984 break;
986 case CTDB_REQ_TUNNEL:
987 CTDB_INCREMENT_STAT(ctdb, client.req_tunnel);
988 daemon_request_tunnel_from_client(client, (struct ctdb_req_tunnel_old *)hdr);
989 break;
991 default:
992 DEBUG(DEBUG_CRIT,(__location__ " daemon: unrecognized operation %u\n",
993 hdr->operation));
996 done:
997 talloc_free(tmp_ctx);
1001 called when the daemon gets a incoming packet
1003 static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
1005 struct ctdb_client *client = talloc_get_type(args, struct ctdb_client);
1006 struct ctdb_req_header *hdr;
1008 if (cnt == 0) {
1009 talloc_free(client);
1010 return;
1013 CTDB_INCREMENT_STAT(client->ctdb, client_packets_recv);
1015 if (cnt < sizeof(*hdr)) {
1016 ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n",
1017 (unsigned)cnt);
1018 return;
1020 hdr = (struct ctdb_req_header *)data;
1022 if (hdr->ctdb_magic != CTDB_MAGIC) {
1023 ctdb_set_error(client->ctdb, "Non CTDB packet rejected\n");
1024 goto err_out;
1027 if (hdr->ctdb_version != CTDB_PROTOCOL) {
1028 ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
1029 goto err_out;
1032 DEBUG(DEBUG_DEBUG,(__location__ " client request %u of type %u length %u from "
1033 "node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
1034 hdr->srcnode, hdr->destnode));
1036 /* it is the responsibility of the incoming packet function to free 'data' */
1037 daemon_incoming_packet(client, hdr);
1038 return;
1040 err_out:
1041 TALLOC_FREE(data);
1045 static int ctdb_clientpid_destructor(struct ctdb_client_pid_list *client_pid)
1047 if (client_pid->ctdb->client_pids != NULL) {
1048 DLIST_REMOVE(client_pid->ctdb->client_pids, client_pid);
1051 return 0;
1054 static int get_new_client_id(struct reqid_context *idr,
1055 struct ctdb_client *client,
1056 uint32_t *out)
1058 uint32_t client_id;
1060 client_id = reqid_new(idr, client);
1062 * Some places in the code (e.g. ctdb_control_db_attach(),
1063 * ctdb_control_db_detach()) assign a special meaning to
1064 * client_id 0. The assumption is that if client_id is 0 then
1065 * the control has come from another daemon. Therefore, we
1066 * should never return client_id == 0.
1068 if (client_id == 0) {
1070 * Don't leak ID 0. This is safe because the ID keeps
1071 * increasing. A test will be added to ensure that
1072 * this doesn't change.
1074 reqid_remove(idr, 0);
1076 client_id = reqid_new(idr, client);
1079 if (client_id == REQID_INVALID) {
1080 return EINVAL;
1083 if (client_id == 0) {
1084 /* Every other ID must have been used and we can't use 0 */
1085 reqid_remove(idr, 0);
1086 return EINVAL;
1089 *out = client_id;
1090 return 0;
1093 static void ctdb_accept_client(struct tevent_context *ev,
1094 struct tevent_fd *fde, uint16_t flags,
1095 void *private_data)
1097 struct sockaddr_un addr;
1098 socklen_t len;
1099 int fd;
1100 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1101 struct ctdb_client *client;
1102 struct ctdb_client_pid_list *client_pid;
1103 pid_t peer_pid = 0;
1104 int ret;
1106 memset(&addr, 0, sizeof(addr));
1107 len = sizeof(addr);
1108 fd = accept(ctdb->daemon.sd, (struct sockaddr *)&addr, &len);
1109 if (fd == -1) {
1110 return;
1112 smb_set_close_on_exec(fd);
1114 ret = set_blocking(fd, false);
1115 if (ret != 0) {
1116 DEBUG(DEBUG_ERR,
1117 (__location__
1118 " failed to set socket non-blocking (%s)\n",
1119 strerror(errno)));
1120 close(fd);
1121 return;
1124 set_close_on_exec(fd);
1126 DEBUG(DEBUG_DEBUG,(__location__ " Created SOCKET FD:%d to connected child\n", fd));
1128 client = talloc_zero(ctdb, struct ctdb_client);
1129 if (ctdb_get_peer_pid(fd, &peer_pid) == 0) {
1130 DEBUG(DEBUG_INFO,("Connected client with pid:%u\n", (unsigned)peer_pid));
1133 client->ctdb = ctdb;
1134 client->fd = fd;
1136 ret = get_new_client_id(ctdb->idr, client, &client->client_id);
1137 if (ret != 0) {
1138 DBG_ERR("Unable to get client ID (%d)\n", ret);
1139 close(fd);
1140 talloc_free(client);
1141 return;
1144 client->pid = peer_pid;
1146 client_pid = talloc(client, struct ctdb_client_pid_list);
1147 if (client_pid == NULL) {
1148 DEBUG(DEBUG_ERR,("Failed to allocate client pid structure\n"));
1149 close(fd);
1150 talloc_free(client);
1151 return;
1153 client_pid->ctdb = ctdb;
1154 client_pid->pid = peer_pid;
1155 client_pid->client = client;
1157 DLIST_ADD(ctdb->client_pids, client_pid);
1159 client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT,
1160 ctdb_daemon_read_cb, client,
1161 "client-%u", client->pid);
1163 talloc_set_destructor(client, ctdb_client_destructor);
1164 talloc_set_destructor(client_pid, ctdb_clientpid_destructor);
1165 ctdb->num_clients++;
1171 * Create a unix domain socket, bind it, secure it and listen. Return
1172 * the file descriptor for the socket.
1174 static int ux_socket_bind(struct ctdb_context *ctdb, bool test_mode_enabled)
1176 struct sockaddr_un addr = { .sun_family = AF_UNIX };
1177 int ret;
1179 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
1180 if (ctdb->daemon.sd == -1) {
1181 return -1;
1184 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
1186 if (! sock_clean(ctdb->daemon.name)) {
1187 return -1;
1190 set_close_on_exec(ctdb->daemon.sd);
1192 ret = set_blocking(ctdb->daemon.sd, false);
1193 if (ret != 0) {
1194 DBG_ERR("Failed to set socket non-blocking (%s)\n",
1195 strerror(errno));
1196 goto failed;
1199 ret = bind(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr));
1200 if (ret == -1) {
1201 D_ERR("Unable to bind on ctdb socket '%s'\n", ctdb->daemon.name);
1202 goto failed;
1205 if (!test_mode_enabled) {
1206 ret = chown(ctdb->daemon.name, geteuid(), getegid());
1207 if (ret != 0 && !test_mode_enabled) {
1208 D_ERR("Unable to secure (chown) ctdb socket '%s'\n",
1209 ctdb->daemon.name);
1210 goto failed;
1214 ret = chmod(ctdb->daemon.name, 0700);
1215 if (ret != 0) {
1216 D_ERR("Unable to secure (chmod) ctdb socket '%s'\n",
1217 ctdb->daemon.name);
1218 goto failed;
1222 ret = listen(ctdb->daemon.sd, 100);
1223 if (ret != 0) {
1224 D_ERR("Unable to listen on ctdb socket '%s'\n",
1225 ctdb->daemon.name);
1226 goto failed;
1229 D_NOTICE("Listening to ctdb socket %s\n", ctdb->daemon.name);
1230 return 0;
1232 failed:
1233 close(ctdb->daemon.sd);
1234 ctdb->daemon.sd = -1;
1235 return -1;
1238 struct ctdb_node *ctdb_find_node(struct ctdb_context *ctdb, uint32_t pnn)
1240 struct ctdb_node *node = NULL;
1241 unsigned int i;
1243 if (pnn == CTDB_CURRENT_NODE) {
1244 pnn = ctdb->pnn;
1247 /* Always found: PNN correctly set just before this is called */
1248 for (i = 0; i < ctdb->num_nodes; i++) {
1249 node = ctdb->nodes[i];
1250 if (pnn == node->pnn) {
1251 return node;
1255 return NULL;
1258 static void initialise_node_flags (struct ctdb_context *ctdb)
1260 struct ctdb_node *node = NULL;
1262 node = ctdb_find_node(ctdb, CTDB_CURRENT_NODE);
1264 * PNN correctly set just before this is called so always
1265 * found but keep static analysers happy...
1267 if (node == NULL) {
1268 DBG_ERR("Unable to find current node\n");
1269 return;
1272 node->flags &= ~NODE_FLAGS_DISCONNECTED;
1274 /* do we start out in DISABLED mode? */
1275 if (ctdb->start_as_disabled != 0) {
1276 D_ERR("This node is configured to start in DISABLED state\n");
1277 node->flags |= NODE_FLAGS_PERMANENTLY_DISABLED;
1279 /* do we start out in STOPPED mode? */
1280 if (ctdb->start_as_stopped != 0) {
1281 D_ERR("This node is configured to start in STOPPED state\n");
1282 node->flags |= NODE_FLAGS_STOPPED;
1286 static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
1287 void *private_data)
1289 if (status != 0) {
1290 ctdb_die(ctdb, "Failed to run setup event");
1292 ctdb_run_notification_script(ctdb, "setup");
1294 /* Start the recovery daemon */
1295 if (ctdb_start_recoverd(ctdb) != 0) {
1296 DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n"));
1297 exit(11);
1300 ctdb_start_periodic_events(ctdb);
1302 ctdb_wait_for_first_recovery(ctdb);
1305 static struct timeval tevent_before_wait_ts;
1306 static struct timeval tevent_after_wait_ts;
1308 static void ctdb_tevent_trace_init(void)
1310 struct timeval now;
1312 now = timeval_current();
1314 tevent_before_wait_ts = now;
1315 tevent_after_wait_ts = now;
1318 static void ctdb_tevent_trace(enum tevent_trace_point tp,
1319 void *private_data)
1321 struct timeval diff;
1322 struct timeval now;
1323 struct ctdb_context *ctdb =
1324 talloc_get_type(private_data, struct ctdb_context);
1326 if (getpid() != ctdb->ctdbd_pid) {
1327 return;
1330 now = timeval_current();
1332 switch (tp) {
1333 case TEVENT_TRACE_BEFORE_WAIT:
1334 diff = timeval_until(&tevent_after_wait_ts, &now);
1335 if (diff.tv_sec > 3) {
1336 DEBUG(DEBUG_ERR,
1337 ("Handling event took %ld seconds!\n",
1338 (long)diff.tv_sec));
1340 tevent_before_wait_ts = now;
1341 break;
1343 case TEVENT_TRACE_AFTER_WAIT:
1344 diff = timeval_until(&tevent_before_wait_ts, &now);
1345 if (diff.tv_sec > 3) {
1346 DEBUG(DEBUG_ERR,
1347 ("No event for %ld seconds!\n",
1348 (long)diff.tv_sec));
1350 tevent_after_wait_ts = now;
1351 break;
1353 default:
1354 /* Do nothing for future tevent trace points */ ;
1358 static void ctdb_remove_pidfile(void)
1360 TALLOC_FREE(ctdbd_pidfile_ctx);
1363 static void ctdb_create_pidfile(TALLOC_CTX *mem_ctx)
1365 if (ctdbd_pidfile != NULL) {
1366 int ret = pidfile_context_create(mem_ctx, ctdbd_pidfile,
1367 &ctdbd_pidfile_ctx);
1368 if (ret != 0) {
1369 DEBUG(DEBUG_ERR,
1370 ("Failed to create PID file %s\n",
1371 ctdbd_pidfile));
1372 exit(11);
1375 DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile));
1376 atexit(ctdb_remove_pidfile);
1380 static void ctdb_initialise_vnn_map(struct ctdb_context *ctdb)
1382 unsigned int i, j, count;
1384 /* initialize the vnn mapping table, skipping any deleted nodes */
1385 ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
1386 CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map);
1388 count = 0;
1389 for (i = 0; i < ctdb->num_nodes; i++) {
1390 if ((ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) == 0) {
1391 count++;
1395 ctdb->vnn_map->generation = INVALID_GENERATION;
1396 ctdb->vnn_map->size = count;
1397 ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
1398 CTDB_NO_MEMORY_FATAL(ctdb, ctdb->vnn_map->map);
1400 for(i=0, j=0; i < ctdb->vnn_map->size; i++) {
1401 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1402 continue;
1404 ctdb->vnn_map->map[j] = i;
1405 j++;
1409 static void ctdb_set_my_pnn(struct ctdb_context *ctdb)
1411 if (ctdb->address == NULL) {
1412 ctdb_fatal(ctdb,
1413 "Can not determine PNN - node address is not set\n");
1416 ctdb->pnn = ctdb_ip_to_pnn(ctdb, ctdb->address);
1417 if (ctdb->pnn == CTDB_UNKNOWN_PNN) {
1418 ctdb_fatal(ctdb,
1419 "Can not determine PNN - unknown node address\n");
1422 D_NOTICE("PNN is %u\n", ctdb->pnn);
1425 static void stdin_handler(struct tevent_context *ev,
1426 struct tevent_fd *fde,
1427 uint16_t flags,
1428 void *private_data)
1430 struct ctdb_context *ctdb = talloc_get_type_abort(
1431 private_data, struct ctdb_context);
1432 ssize_t nread;
1433 char c;
1435 nread = read(STDIN_FILENO, &c, 1);
1436 if (nread != 1) {
1437 D_ERR("stdin closed, exiting\n");
1438 talloc_free(fde);
1439 ctdb_shutdown_sequence(ctdb, EPIPE);
1443 static int setup_stdin_handler(struct ctdb_context *ctdb)
1445 struct tevent_fd *fde;
1446 struct stat st;
1447 int ret;
1449 ret = fstat(STDIN_FILENO, &st);
1450 if (ret != 0) {
1451 /* Problem with stdin, ignore... */
1452 DBG_INFO("Can't fstat() stdin\n");
1453 return 0;
1456 if (!S_ISFIFO(st.st_mode)) {
1457 DBG_INFO("Not a pipe...\n");
1458 return 0;
1461 fde = tevent_add_fd(ctdb->ev,
1462 ctdb,
1463 STDIN_FILENO,
1464 TEVENT_FD_READ,
1465 stdin_handler,
1466 ctdb);
1467 if (fde == NULL) {
1468 return ENOMEM;
1471 DBG_INFO("Set up stdin handler\n");
1472 return 0;
1475 static void fork_only(void)
1477 pid_t pid;
1479 pid = fork();
1480 if (pid == -1) {
1481 D_ERR("Fork failed (errno=%d)\n", errno);
1482 exit(1);
1485 if (pid != 0) {
1486 /* Parent simply exits... */
1487 exit(0);
1491 static void sighup_hook(void *private_data)
1493 struct ctdb_context *ctdb = talloc_get_type_abort(private_data,
1494 struct ctdb_context);
1496 if (ctdb->recoverd_pid > 0) {
1497 kill(ctdb->recoverd_pid, SIGHUP);
1499 ctdb_event_reopen_logs(ctdb);
1503 start the protocol going as a daemon
1505 int ctdb_start_daemon(struct ctdb_context *ctdb,
1506 bool interactive,
1507 bool test_mode_enabled)
1509 bool status;
1510 int ret;
1511 struct tevent_fd *fde;
1513 /* Fork if not interactive */
1514 if (!interactive) {
1515 if (test_mode_enabled) {
1516 /* Keep stdin open */
1517 fork_only();
1518 } else {
1519 /* Fork, close stdin, start a session */
1520 become_daemon(true, false, false);
1524 ignore_signal(SIGPIPE);
1525 ignore_signal(SIGUSR1);
1527 ctdb->ctdbd_pid = getpid();
1528 DEBUG(DEBUG_ERR, ("Starting CTDBD (Version %s) as PID: %u\n",
1529 SAMBA_VERSION_STRING, ctdb->ctdbd_pid));
1530 ctdb_create_pidfile(ctdb);
1532 /* create a unix domain stream socket to listen to */
1533 ret = ux_socket_bind(ctdb, test_mode_enabled);
1534 if (ret != 0) {
1535 D_ERR("Cannot continue. Exiting!\n");
1536 exit(10);
1539 /* Make sure we log something when the daemon terminates.
1540 * This must be the first exit handler to run (so the last to
1541 * be registered.
1543 __ctdbd_pid = getpid();
1544 atexit(print_exit_message);
1546 if (ctdb->do_setsched) {
1547 /* try to set us up as realtime */
1548 if (!set_scheduler()) {
1549 exit(1);
1551 DEBUG(DEBUG_NOTICE, ("Set real-time scheduler priority\n"));
1554 ctdb->ev = tevent_context_init(NULL);
1555 if (ctdb->ev == NULL) {
1556 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
1557 exit(1);
1559 tevent_loop_allow_nesting(ctdb->ev);
1560 ctdb_tevent_trace_init();
1561 tevent_set_trace_callback(ctdb->ev, ctdb_tevent_trace, ctdb);
1563 status = logging_setup_sighup_handler(ctdb->ev,
1564 ctdb,
1565 sighup_hook,
1566 ctdb);
1567 if (!status) {
1568 D_ERR("Failed to set up signal handler for SIGHUP\n");
1569 exit(1);
1572 /* set up a handler to pick up sigchld */
1573 if (ctdb_init_sigchld(ctdb) == NULL) {
1574 DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
1575 exit(1);
1578 if (!interactive) {
1579 ctdb_set_child_logging(ctdb);
1582 /* Exit if stdin is closed */
1583 if (test_mode_enabled) {
1584 ret = setup_stdin_handler(ctdb);
1585 if (ret != 0) {
1586 DBG_ERR("Failed to setup stdin handler\n");
1587 exit(1);
1591 TALLOC_FREE(ctdb->srv);
1592 if (srvid_init(ctdb, &ctdb->srv) != 0) {
1593 DEBUG(DEBUG_CRIT,("Failed to setup message srvid context\n"));
1594 exit(1);
1597 TALLOC_FREE(ctdb->tunnels);
1598 if (srvid_init(ctdb, &ctdb->tunnels) != 0) {
1599 DEBUG(DEBUG_ERR, ("Failed to setup tunnels context\n"));
1600 exit(1);
1603 /* initialize statistics collection */
1604 ctdb_statistics_init(ctdb);
1606 /* force initial recovery for election */
1607 ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
1609 if (ctdb_start_eventd(ctdb) != 0) {
1610 DEBUG(DEBUG_ERR, ("Failed to start event daemon\n"));
1611 exit(1);
1614 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT);
1615 ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
1616 if (ret != 0) {
1617 ctdb_die(ctdb, "Failed to run init event\n");
1619 ctdb_run_notification_script(ctdb, "init");
1621 if (strcmp(ctdb->transport, "tcp") == 0) {
1622 ret = ctdb_tcp_init(ctdb);
1624 #ifdef USE_INFINIBAND
1625 if (strcmp(ctdb->transport, "ib") == 0) {
1626 ret = ctdb_ibw_init(ctdb);
1628 #endif
1629 if (ret != 0) {
1630 DEBUG(DEBUG_ERR,("Failed to initialise transport '%s'\n", ctdb->transport));
1631 return -1;
1634 if (ctdb->methods == NULL) {
1635 DEBUG(DEBUG_ALERT,(__location__ " Can not initialize transport. ctdb->methods is NULL\n"));
1636 ctdb_fatal(ctdb, "transport is unavailable. can not initialize.");
1639 /* Initialise the transport. This sets the node address if it
1640 * was not set via the command-line. */
1641 if (ctdb->methods->initialise(ctdb) != 0) {
1642 ctdb_fatal(ctdb, "transport failed to initialise");
1645 ctdb_set_my_pnn(ctdb);
1647 initialise_node_flags(ctdb);
1649 ret = ctdb_set_public_addresses(ctdb, true);
1650 if (ret == -1) {
1651 D_ERR("Unable to setup public IP addresses\n");
1652 exit(1);
1655 ctdb_initialise_vnn_map(ctdb);
1657 /* attach to existing databases */
1658 if (ctdb_attach_databases(ctdb) != 0) {
1659 ctdb_fatal(ctdb, "Failed to attach to databases\n");
1662 /* start frozen, then let the first election sort things out */
1663 if (!ctdb_blocking_freeze(ctdb)) {
1664 ctdb_fatal(ctdb, "Failed to get initial freeze\n");
1667 /* now start accepting clients, only can do this once frozen */
1668 fde = tevent_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd, TEVENT_FD_READ,
1669 ctdb_accept_client, ctdb);
1670 if (fde == NULL) {
1671 ctdb_fatal(ctdb, "Failed to add daemon socket to event loop");
1673 tevent_fd_set_auto_close(fde);
1675 /* Start the transport */
1676 if (ctdb->methods->start(ctdb) != 0) {
1677 DEBUG(DEBUG_ALERT,("transport failed to start!\n"));
1678 ctdb_fatal(ctdb, "transport failed to start");
1681 /* Recovery daemon and timed events are started from the
1682 * callback, only after the setup event completes
1683 * successfully.
1685 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SETUP);
1686 ret = ctdb_event_script_callback(ctdb,
1687 ctdb,
1688 ctdb_setup_event_callback,
1689 ctdb,
1690 CTDB_EVENT_SETUP,
1691 "%s",
1692 "");
1693 if (ret != 0) {
1694 DEBUG(DEBUG_CRIT,("Failed to set up 'setup' event\n"));
1695 exit(1);
1698 lockdown_memory(ctdb->valgrinding);
1700 /* go into a wait loop to allow other nodes to complete */
1701 tevent_loop_wait(ctdb->ev);
1703 DEBUG(DEBUG_CRIT,("event_loop_wait() returned. this should not happen\n"));
1704 exit(1);
1708 allocate a packet for use in daemon<->daemon communication
1710 struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
1711 TALLOC_CTX *mem_ctx,
1712 enum ctdb_operation operation,
1713 size_t length, size_t slength,
1714 const char *type)
1716 int size;
1717 struct ctdb_req_header *hdr;
1719 length = MAX(length, slength);
1720 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
1722 if (ctdb->methods == NULL) {
1723 DEBUG(DEBUG_INFO,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n",
1724 operation, (unsigned)length));
1725 return NULL;
1728 hdr = (struct ctdb_req_header *)ctdb->methods->allocate_pkt(mem_ctx, size);
1729 if (hdr == NULL) {
1730 DEBUG(DEBUG_ERR,("Unable to allocate transport packet for operation %u of length %u\n",
1731 operation, (unsigned)length));
1732 return NULL;
1734 talloc_set_name_const(hdr, type);
1735 memset(hdr, 0, slength);
1736 hdr->length = length;
1737 hdr->operation = operation;
1738 hdr->ctdb_magic = CTDB_MAGIC;
1739 hdr->ctdb_version = CTDB_PROTOCOL;
1740 hdr->generation = ctdb->vnn_map->generation;
1741 hdr->srcnode = ctdb->pnn;
1743 return hdr;
1746 struct daemon_control_state {
1747 struct daemon_control_state *next, *prev;
1748 struct ctdb_client *client;
1749 struct ctdb_req_control_old *c;
1750 uint32_t reqid;
1751 struct ctdb_node *node;
1755 callback when a control reply comes in
1757 static void daemon_control_callback(struct ctdb_context *ctdb,
1758 int32_t status, TDB_DATA data,
1759 const char *errormsg,
1760 void *private_data)
1762 struct daemon_control_state *state = talloc_get_type(private_data,
1763 struct daemon_control_state);
1764 struct ctdb_client *client = state->client;
1765 struct ctdb_reply_control_old *r;
1766 size_t len;
1767 int ret;
1769 /* construct a message to send to the client containing the data */
1770 len = offsetof(struct ctdb_reply_control_old, data) + data.dsize;
1771 if (errormsg) {
1772 len += strlen(errormsg);
1774 r = ctdbd_allocate_pkt(ctdb, state, CTDB_REPLY_CONTROL, len,
1775 struct ctdb_reply_control_old);
1776 CTDB_NO_MEMORY_VOID(ctdb, r);
1778 r->hdr.reqid = state->reqid;
1779 r->status = status;
1780 r->datalen = data.dsize;
1781 r->errorlen = 0;
1782 memcpy(&r->data[0], data.dptr, data.dsize);
1783 if (errormsg) {
1784 r->errorlen = strlen(errormsg);
1785 memcpy(&r->data[r->datalen], errormsg, r->errorlen);
1788 ret = daemon_queue_send(client, &r->hdr);
1789 if (ret != -1) {
1790 talloc_free(state);
1795 fail all pending controls to a disconnected node
1797 void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
1799 struct daemon_control_state *state;
1800 while ((state = node->pending_controls)) {
1801 DLIST_REMOVE(node->pending_controls, state);
1802 daemon_control_callback(ctdb, (uint32_t)-1, tdb_null,
1803 "node is disconnected", state);
1808 destroy a daemon_control_state
1810 static int daemon_control_destructor(struct daemon_control_state *state)
1812 if (state->node) {
1813 DLIST_REMOVE(state->node->pending_controls, state);
1815 return 0;
1819 this is called when the ctdb daemon received a ctdb request control
1820 from a local client over the unix domain socket
1822 static void daemon_request_control_from_client(struct ctdb_client *client,
1823 struct ctdb_req_control_old *c)
1825 TDB_DATA data;
1826 int res;
1827 struct daemon_control_state *state;
1828 TALLOC_CTX *tmp_ctx = talloc_new(client);
1830 if (c->hdr.destnode == CTDB_CURRENT_NODE) {
1831 c->hdr.destnode = client->ctdb->pnn;
1834 state = talloc(client, struct daemon_control_state);
1835 CTDB_NO_MEMORY_VOID(client->ctdb, state);
1837 state->client = client;
1838 state->c = talloc_steal(state, c);
1839 state->reqid = c->hdr.reqid;
1840 if (ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
1841 state->node = client->ctdb->nodes[c->hdr.destnode];
1842 DLIST_ADD(state->node->pending_controls, state);
1843 } else {
1844 state->node = NULL;
1847 talloc_set_destructor(state, daemon_control_destructor);
1849 if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
1850 talloc_steal(tmp_ctx, state);
1853 data.dptr = &c->data[0];
1854 data.dsize = c->datalen;
1855 res = ctdb_daemon_send_control(client->ctdb, c->hdr.destnode,
1856 c->srvid, c->opcode, client->client_id,
1857 c->flags,
1858 data, daemon_control_callback,
1859 state);
1860 if (res != 0) {
1861 DEBUG(DEBUG_ERR,(__location__ " Failed to send control to remote node %u\n",
1862 c->hdr.destnode));
1865 talloc_free(tmp_ctx);
1868 static void daemon_request_tunnel_from_client(struct ctdb_client *client,
1869 struct ctdb_req_tunnel_old *c)
1871 TDB_DATA data;
1872 int ret;
1874 if (! ctdb_validate_pnn(client->ctdb, c->hdr.destnode)) {
1875 DEBUG(DEBUG_ERR, ("Invalid destination 0x%x\n",
1876 c->hdr.destnode));
1877 return;
1880 ret = srvid_exists(client->ctdb->tunnels, c->tunnel_id, NULL);
1881 if (ret != 0) {
1882 DEBUG(DEBUG_ERR,
1883 ("tunnel id 0x%"PRIx64" not registered, dropping pkt\n",
1884 c->tunnel_id));
1885 return;
1888 data = (TDB_DATA) {
1889 .dsize = c->datalen,
1890 .dptr = &c->data[0],
1893 ret = ctdb_daemon_send_tunnel(client->ctdb, c->hdr.destnode,
1894 c->tunnel_id, c->flags, data);
1895 if (ret != 0) {
1896 DEBUG(DEBUG_ERR, ("Failed to set tunnel to remote note %u\n",
1897 c->hdr.destnode));
1902 register a call function
1904 int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
1905 ctdb_fn_t fn, int id)
1907 struct ctdb_registered_call *call;
1908 struct ctdb_db_context *ctdb_db;
1910 ctdb_db = find_ctdb_db(ctdb, db_id);
1911 if (ctdb_db == NULL) {
1912 return -1;
1915 call = talloc(ctdb_db, struct ctdb_registered_call);
1916 call->fn = fn;
1917 call->id = id;
1919 DLIST_ADD(ctdb_db->calls, call);
1920 return 0;
1926 this local messaging handler is ugly, but is needed to prevent
1927 recursion in ctdb_send_message() when the destination node is the
1928 same as the source node
1930 struct ctdb_local_message {
1931 struct ctdb_context *ctdb;
1932 uint64_t srvid;
1933 TDB_DATA data;
1936 static void ctdb_local_message_trigger(struct tevent_context *ev,
1937 struct tevent_timer *te,
1938 struct timeval t, void *private_data)
1940 struct ctdb_local_message *m = talloc_get_type(
1941 private_data, struct ctdb_local_message);
1943 srvid_dispatch(m->ctdb->srv, m->srvid, CTDB_SRVID_ALL, m->data);
1944 talloc_free(m);
1947 static int ctdb_local_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
1949 struct ctdb_local_message *m;
1950 m = talloc(ctdb, struct ctdb_local_message);
1951 CTDB_NO_MEMORY(ctdb, m);
1953 m->ctdb = ctdb;
1954 m->srvid = srvid;
1955 m->data = data;
1956 m->data.dptr = talloc_memdup(m, m->data.dptr, m->data.dsize);
1957 if (m->data.dptr == NULL) {
1958 talloc_free(m);
1959 return -1;
1962 /* this needs to be done as an event to prevent recursion */
1963 tevent_add_timer(ctdb->ev, m, timeval_zero(),
1964 ctdb_local_message_trigger, m);
1965 return 0;
1969 send a ctdb message
1971 int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn,
1972 uint64_t srvid, TDB_DATA data)
1974 struct ctdb_req_message_old *r;
1975 int len;
1977 if (ctdb->methods == NULL) {
1978 DEBUG(DEBUG_INFO,(__location__ " Failed to send message. Transport is DOWN\n"));
1979 return -1;
1982 /* see if this is a message to ourselves */
1983 if (pnn == ctdb->pnn) {
1984 return ctdb_local_message(ctdb, srvid, data);
1987 len = offsetof(struct ctdb_req_message_old, data) + data.dsize;
1988 r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_MESSAGE, len,
1989 struct ctdb_req_message_old);
1990 CTDB_NO_MEMORY(ctdb, r);
1992 r->hdr.destnode = pnn;
1993 r->srvid = srvid;
1994 r->datalen = data.dsize;
1995 memcpy(&r->data[0], data.dptr, data.dsize);
1997 ctdb_queue_packet(ctdb, &r->hdr);
1999 talloc_free(r);
2000 return 0;
2005 struct ctdb_client_notify_list {
2006 struct ctdb_client_notify_list *next, *prev;
2007 struct ctdb_context *ctdb;
2008 uint64_t srvid;
2009 TDB_DATA data;
2013 static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl)
2015 int ret;
2017 DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid));
2019 ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data);
2020 if (ret != 0) {
2021 DEBUG(DEBUG_ERR,("Failed to send client notify message\n"));
2024 return 0;
2027 int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
2029 struct ctdb_notify_data_old *notify = (struct ctdb_notify_data_old *)indata.dptr;
2030 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2031 struct ctdb_client_notify_list *nl;
2033 DEBUG(DEBUG_INFO,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id));
2035 if (indata.dsize < offsetof(struct ctdb_notify_data_old, notify_data)) {
2036 DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize));
2037 return -1;
2040 if (indata.dsize != (notify->len + offsetof(struct ctdb_notify_data_old, notify_data))) {
2041 DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_notify_data_old, notify_data))));
2042 return -1;
2046 if (client == NULL) {
2047 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
2048 return -1;
2051 for(nl=client->notify; nl; nl=nl->next) {
2052 if (nl->srvid == notify->srvid) {
2053 break;
2056 if (nl != NULL) {
2057 DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid));
2058 return -1;
2061 nl = talloc(client, struct ctdb_client_notify_list);
2062 CTDB_NO_MEMORY(ctdb, nl);
2063 nl->ctdb = ctdb;
2064 nl->srvid = notify->srvid;
2065 nl->data.dsize = notify->len;
2066 nl->data.dptr = talloc_memdup(nl, notify->notify_data,
2067 nl->data.dsize);
2068 CTDB_NO_MEMORY(ctdb, nl->data.dptr);
2070 DLIST_ADD(client->notify, nl);
2071 talloc_set_destructor(nl, ctdb_client_notify_destructor);
2073 return 0;
2076 int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata)
2078 uint64_t srvid = *(uint64_t *)indata.dptr;
2079 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2080 struct ctdb_client_notify_list *nl;
2082 DEBUG(DEBUG_INFO,("Deregister srvid %llu for client %d\n", (unsigned long long)srvid, client_id));
2084 if (client == NULL) {
2085 DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
2086 return -1;
2089 for(nl=client->notify; nl; nl=nl->next) {
2090 if (nl->srvid == srvid) {
2091 break;
2094 if (nl == NULL) {
2095 DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)srvid));
2096 return -1;
2099 DLIST_REMOVE(client->notify, nl);
2100 talloc_set_destructor(nl, NULL);
2101 talloc_free(nl);
2103 return 0;
2106 struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb, pid_t pid)
2108 struct ctdb_client_pid_list *client_pid;
2110 for (client_pid = ctdb->client_pids; client_pid; client_pid=client_pid->next) {
2111 if (client_pid->pid == pid) {
2112 return client_pid->client;
2115 return NULL;
2119 /* This control is used by samba when probing if a process (of a samba daemon)
2120 exists on the node.
2121 Samba does this when it needs/wants to check if a subrecord in one of the
2122 databases is still valid, or if it is stale and can be removed.
2123 If the node is in unhealthy or stopped state we just kill of the samba
2124 process holding this sub-record and return to the calling samba that
2125 the process does not exist.
2126 This allows us to forcefully recall subrecords registered by samba processes
2127 on banned and stopped nodes.
2129 int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid)
2131 struct ctdb_client *client;
2133 client = ctdb_find_client_by_pid(ctdb, pid);
2134 if (client == NULL) {
2135 return -1;
2138 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
2139 DEBUG(DEBUG_NOTICE,
2140 ("Killing client with pid:%d on banned/stopped node\n",
2141 (int)pid));
2142 talloc_free(client);
2143 return -1;
2146 return kill(pid, 0);
2149 int32_t ctdb_control_check_pid_srvid(struct ctdb_context *ctdb,
2150 TDB_DATA indata)
2152 struct ctdb_client_pid_list *client_pid;
2153 pid_t pid;
2154 uint64_t srvid;
2155 int ret;
2157 pid = *(pid_t *)indata.dptr;
2158 srvid = *(uint64_t *)(indata.dptr + sizeof(pid_t));
2160 for (client_pid = ctdb->client_pids;
2161 client_pid != NULL;
2162 client_pid = client_pid->next) {
2163 if (client_pid->pid == pid) {
2164 ret = srvid_exists(ctdb->srv, srvid,
2165 client_pid->client);
2166 if (ret == 0) {
2167 return 0;
2172 return -1;
2175 int ctdb_control_getnodesfile(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
2177 struct ctdb_node_map_old *node_map = NULL;
2179 CHECK_CONTROL_DATA_SIZE(0);
2181 node_map = ctdb_read_nodes_file(ctdb, ctdb->nodes_file);
2182 if (node_map == NULL) {
2183 DEBUG(DEBUG_ERR, ("Failed to read nodes file\n"));
2184 return -1;
2187 outdata->dptr = (unsigned char *)node_map;
2188 outdata->dsize = talloc_get_size(outdata->dptr);
2190 return 0;
2193 void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code)
2195 if (ctdb->runstate == CTDB_RUNSTATE_SHUTDOWN) {
2196 DEBUG(DEBUG_NOTICE,("Already shutting down so will not proceed.\n"));
2197 return;
2200 DEBUG(DEBUG_ERR,("Shutdown sequence commencing.\n"));
2201 ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SHUTDOWN);
2202 ctdb_stop_recoverd(ctdb);
2203 ctdb_stop_keepalive(ctdb);
2204 ctdb_stop_monitoring(ctdb);
2205 ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
2206 ctdb_stop_eventd(ctdb);
2207 if (ctdb->methods != NULL && ctdb->methods->shutdown != NULL) {
2208 ctdb->methods->shutdown(ctdb);
2211 DEBUG(DEBUG_ERR,("Shutdown sequence complete, exiting.\n"));
2212 exit(exit_code);
2215 /* When forking the main daemon and the child process needs to connect
2216 * back to the daemon as a client process, this function can be used
2217 * to change the ctdb context from daemon into client mode. The child
2218 * process must be created using ctdb_fork() and not fork() -
2219 * ctdb_fork() does some necessary housekeeping.
2221 int switch_from_server_to_client(struct ctdb_context *ctdb)
2223 int ret;
2225 if (ctdb->daemon.sd != -1) {
2226 close(ctdb->daemon.sd);
2227 ctdb->daemon.sd = -1;
2230 /* get a new event context */
2231 ctdb->ev = tevent_context_init(ctdb);
2232 if (ctdb->ev == NULL) {
2233 DEBUG(DEBUG_ALERT,("tevent_context_init() failed\n"));
2234 exit(1);
2236 tevent_loop_allow_nesting(ctdb->ev);
2238 /* Connect to main CTDB daemon */
2239 ret = ctdb_socket_connect(ctdb);
2240 if (ret != 0) {
2241 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
2242 return -1;
2245 ctdb->can_send_controls = true;
2247 return 0;