Convert all uint32/16/8 to _t in source3/locking.
[Samba.git] / ctdb / client / ctdb_client.c
blob5bf3a68855eedabe0ca87c5be5c687f3dd72a67f
1 /*
2 ctdb daemon code
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "lib/tdb_wrap/tdb_wrap.h"
23 #include "tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/locale.h"
28 #include <stdlib.h>
29 #include "../include/ctdb_private.h"
30 #include "lib/util/dlinklist.h"
33 allocate a packet for use in client<->daemon communication
35 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
36 TALLOC_CTX *mem_ctx,
37 enum ctdb_operation operation,
38 size_t length, size_t slength,
39 const char *type)
41 int size;
42 struct ctdb_req_header *hdr;
44 length = MAX(length, slength);
45 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
47 hdr = (struct ctdb_req_header *)talloc_zero_size(mem_ctx, size);
48 if (hdr == NULL) {
49 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
50 operation, (unsigned)length));
51 return NULL;
53 talloc_set_name_const(hdr, type);
54 hdr->length = length;
55 hdr->operation = operation;
56 hdr->ctdb_magic = CTDB_MAGIC;
57 hdr->ctdb_version = CTDB_PROTOCOL;
58 hdr->srcnode = ctdb->pnn;
59 if (ctdb->vnn_map) {
60 hdr->generation = ctdb->vnn_map->generation;
63 return hdr;
67 local version of ctdb_call
69 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
70 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
71 TDB_DATA *data, bool updatetdb)
73 struct ctdb_call_info *c;
74 struct ctdb_registered_call *fn;
75 struct ctdb_context *ctdb = ctdb_db->ctdb;
77 c = talloc(ctdb, struct ctdb_call_info);
78 CTDB_NO_MEMORY(ctdb, c);
80 c->key = call->key;
81 c->call_data = &call->call_data;
82 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
83 c->record_data.dsize = data->dsize;
84 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
85 c->new_data = NULL;
86 c->reply_data = NULL;
87 c->status = 0;
88 c->header = header;
90 for (fn=ctdb_db->calls;fn;fn=fn->next) {
91 if (fn->id == call->call_id) break;
93 if (fn == NULL) {
94 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
95 talloc_free(c);
96 return -1;
99 if (fn->fn(c) != 0) {
100 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
101 talloc_free(c);
102 return -1;
105 /* we need to force the record to be written out if this was a remote access */
106 if (c->new_data == NULL) {
107 c->new_data = &c->record_data;
110 if (c->new_data && updatetdb) {
111 /* XXX check that we always have the lock here? */
112 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
113 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
114 talloc_free(c);
115 return -1;
119 if (c->reply_data) {
120 call->reply_data = *c->reply_data;
122 talloc_steal(call, call->reply_data.dptr);
123 talloc_set_name_const(call->reply_data.dptr, __location__);
124 } else {
125 call->reply_data.dptr = NULL;
126 call->reply_data.dsize = 0;
128 call->status = c->status;
130 talloc_free(c);
132 return 0;
137 queue a packet for sending from client to daemon
139 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
141 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
146 called when a CTDB_REPLY_CALL packet comes in in the client
148 This packet comes in response to a CTDB_REQ_CALL request packet. It
149 contains any reply data from the call
151 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
153 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
154 struct ctdb_client_call_state *state;
156 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
157 if (state == NULL) {
158 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
159 return;
162 if (hdr->reqid != state->reqid) {
163 /* we found a record but it was the wrong one */
164 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
165 return;
168 state->call->reply_data.dptr = c->data;
169 state->call->reply_data.dsize = c->datalen;
170 state->call->status = c->status;
172 talloc_steal(state, c);
174 state->state = CTDB_CALL_DONE;
176 if (state->async.fn) {
177 state->async.fn(state);
181 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
184 this is called in the client, when data comes in from the daemon
186 void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
188 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
189 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
190 TALLOC_CTX *tmp_ctx;
192 /* place the packet as a child of a tmp_ctx. We then use
193 talloc_free() below to free it. If any of the calls want
194 to keep it, then they will steal it somewhere else, and the
195 talloc_free() will be a no-op */
196 tmp_ctx = talloc_new(ctdb);
197 talloc_steal(tmp_ctx, hdr);
199 if (cnt == 0) {
200 DEBUG(DEBUG_CRIT,("Daemon has exited - shutting down client\n"));
201 exit(1);
204 if (cnt < sizeof(*hdr)) {
205 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
206 goto done;
208 if (cnt != hdr->length) {
209 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
210 (unsigned)hdr->length, (unsigned)cnt);
211 goto done;
214 if (hdr->ctdb_magic != CTDB_MAGIC) {
215 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
216 goto done;
219 if (hdr->ctdb_version != CTDB_PROTOCOL) {
220 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
221 goto done;
224 switch (hdr->operation) {
225 case CTDB_REPLY_CALL:
226 ctdb_client_reply_call(ctdb, hdr);
227 break;
229 case CTDB_REQ_MESSAGE:
230 ctdb_request_message(ctdb, hdr);
231 break;
233 case CTDB_REPLY_CONTROL:
234 ctdb_client_reply_control(ctdb, hdr);
235 break;
237 default:
238 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
241 done:
242 talloc_free(tmp_ctx);
246 connect to a unix domain socket
248 int ctdb_socket_connect(struct ctdb_context *ctdb)
250 struct sockaddr_un addr;
252 memset(&addr, 0, sizeof(addr));
253 addr.sun_family = AF_UNIX;
254 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
256 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
257 if (ctdb->daemon.sd == -1) {
258 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
259 return -1;
262 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
263 close(ctdb->daemon.sd);
264 ctdb->daemon.sd = -1;
265 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
266 return -1;
269 set_nonblocking(ctdb->daemon.sd);
270 set_close_on_exec(ctdb->daemon.sd);
272 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
273 CTDB_DS_ALIGNMENT,
274 ctdb_client_read_cb, ctdb, "to-ctdbd");
275 return 0;
279 struct ctdb_record_handle {
280 struct ctdb_db_context *ctdb_db;
281 TDB_DATA key;
282 TDB_DATA *data;
283 struct ctdb_ltdb_header header;
288 make a recv call to the local ctdb daemon - called from client context
290 This is called when the program wants to wait for a ctdb_call to complete and get the
291 results. This call will block unless the call has already completed.
293 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
295 if (state == NULL) {
296 return -1;
299 while (state->state < CTDB_CALL_DONE) {
300 event_loop_once(state->ctdb_db->ctdb->ev);
302 if (state->state != CTDB_CALL_DONE) {
303 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
304 talloc_free(state);
305 return -1;
308 if (state->call->reply_data.dsize) {
309 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
310 state->call->reply_data.dptr,
311 state->call->reply_data.dsize);
312 call->reply_data.dsize = state->call->reply_data.dsize;
313 } else {
314 call->reply_data.dptr = NULL;
315 call->reply_data.dsize = 0;
317 call->status = state->call->status;
318 talloc_free(state);
320 return call->status;
327 destroy a ctdb_call in client
329 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
331 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
332 return 0;
336 construct an event driven local ctdb_call
338 this is used so that locally processed ctdb_call requests are processed
339 in an event driven manner
341 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
342 struct ctdb_call *call,
343 struct ctdb_ltdb_header *header,
344 TDB_DATA *data)
346 struct ctdb_client_call_state *state;
347 struct ctdb_context *ctdb = ctdb_db->ctdb;
348 int ret;
350 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
351 CTDB_NO_MEMORY_NULL(ctdb, state);
352 state->call = talloc_zero(state, struct ctdb_call);
353 CTDB_NO_MEMORY_NULL(ctdb, state->call);
355 talloc_steal(state, data->dptr);
357 state->state = CTDB_CALL_DONE;
358 *(state->call) = *call;
359 state->ctdb_db = ctdb_db;
361 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
362 if (ret != 0) {
363 DEBUG(DEBUG_DEBUG,("ctdb_call_local() failed, ignoring return code %d\n", ret));
366 return state;
370 make a ctdb call to the local daemon - async send. Called from client context.
372 This constructs a ctdb_call request and queues it for processing.
373 This call never blocks.
375 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
376 struct ctdb_call *call)
378 struct ctdb_client_call_state *state;
379 struct ctdb_context *ctdb = ctdb_db->ctdb;
380 struct ctdb_ltdb_header header;
381 TDB_DATA data;
382 int ret;
383 size_t len;
384 struct ctdb_req_call *c;
386 /* if the domain socket is not yet open, open it */
387 if (ctdb->daemon.sd==-1) {
388 ctdb_socket_connect(ctdb);
391 ret = ctdb_ltdb_lock(ctdb_db, call->key);
392 if (ret != 0) {
393 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
394 return NULL;
397 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
399 if ((call->flags & CTDB_IMMEDIATE_MIGRATION) && (header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
400 ret = -1;
403 if (ret == 0 && header.dmaster == ctdb->pnn) {
404 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
405 talloc_free(data.dptr);
406 ctdb_ltdb_unlock(ctdb_db, call->key);
407 return state;
410 ctdb_ltdb_unlock(ctdb_db, call->key);
411 talloc_free(data.dptr);
413 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
414 if (state == NULL) {
415 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
416 return NULL;
418 state->call = talloc_zero(state, struct ctdb_call);
419 if (state->call == NULL) {
420 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
421 return NULL;
424 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
425 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
426 if (c == NULL) {
427 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
428 return NULL;
431 state->reqid = ctdb_reqid_new(ctdb, state);
432 state->ctdb_db = ctdb_db;
433 talloc_set_destructor(state, ctdb_client_call_destructor);
435 c->hdr.reqid = state->reqid;
436 c->flags = call->flags;
437 c->db_id = ctdb_db->db_id;
438 c->callid = call->call_id;
439 c->hopcount = 0;
440 c->keylen = call->key.dsize;
441 c->calldatalen = call->call_data.dsize;
442 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
443 memcpy(&c->data[call->key.dsize],
444 call->call_data.dptr, call->call_data.dsize);
445 *(state->call) = *call;
446 state->call->call_data.dptr = &c->data[call->key.dsize];
447 state->call->key.dptr = &c->data[0];
449 state->state = CTDB_CALL_WAIT;
452 ctdb_client_queue_pkt(ctdb, &c->hdr);
454 return state;
459 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
461 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
463 struct ctdb_client_call_state *state;
465 state = ctdb_call_send(ctdb_db, call);
466 return ctdb_call_recv(state, call);
471 tell the daemon what messaging srvid we will use, and register the message
472 handler function in the client
474 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
475 ctdb_msg_fn_t handler,
476 void *private_data)
478 int res;
479 int32_t status;
481 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
482 tdb_null, NULL, NULL, &status, NULL, NULL);
483 if (res != 0 || status != 0) {
484 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
485 return -1;
488 /* also need to register the handler with our own ctdb structure */
489 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
493 tell the daemon we no longer want a srvid
495 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
497 int res;
498 int32_t status;
500 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
501 tdb_null, NULL, NULL, &status, NULL, NULL);
502 if (res != 0 || status != 0) {
503 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
504 return -1;
507 /* also need to register the handler with our own ctdb structure */
508 ctdb_deregister_message_handler(ctdb, srvid, private_data);
509 return 0;
513 * check server ids
515 int ctdb_client_check_message_handlers(struct ctdb_context *ctdb, uint64_t *ids, uint32_t num,
516 uint8_t *result)
518 TDB_DATA indata, outdata;
519 int res;
520 int32_t status;
521 int i;
523 indata.dptr = (uint8_t *)ids;
524 indata.dsize = num * sizeof(*ids);
526 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_CHECK_SRVIDS, 0,
527 indata, ctdb, &outdata, &status, NULL, NULL);
528 if (res != 0 || status != 0) {
529 DEBUG(DEBUG_ERR, (__location__ " failed to check srvids\n"));
530 return -1;
533 if (outdata.dsize != num*sizeof(uint8_t)) {
534 DEBUG(DEBUG_ERR, (__location__ " expected %lu bytes, received %zi bytes\n",
535 (long unsigned int)num*sizeof(uint8_t),
536 outdata.dsize));
537 talloc_free(outdata.dptr);
538 return -1;
541 for (i=0; i<num; i++) {
542 result[i] = outdata.dptr[i];
545 talloc_free(outdata.dptr);
546 return 0;
550 send a message - from client context
552 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
553 uint64_t srvid, TDB_DATA data)
555 struct ctdb_req_message *r;
556 int len, res;
558 len = offsetof(struct ctdb_req_message, data) + data.dsize;
559 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
560 len, struct ctdb_req_message);
561 CTDB_NO_MEMORY(ctdb, r);
563 r->hdr.destnode = pnn;
564 r->srvid = srvid;
565 r->datalen = data.dsize;
566 memcpy(&r->data[0], data.dptr, data.dsize);
568 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
569 talloc_free(r);
570 return res;
575 cancel a ctdb_fetch_lock operation, releasing the lock
577 static int fetch_lock_destructor(struct ctdb_record_handle *h)
579 ctdb_ltdb_unlock(h->ctdb_db, h->key);
580 return 0;
584 force the migration of a record to this node
586 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
588 struct ctdb_call call;
589 ZERO_STRUCT(call);
590 call.call_id = CTDB_NULL_FUNC;
591 call.key = key;
592 call.flags = CTDB_IMMEDIATE_MIGRATION;
593 return ctdb_call(ctdb_db, &call);
597 try to fetch a readonly copy of a record
599 static int
600 ctdb_client_fetch_readonly(struct ctdb_db_context *ctdb_db, TDB_DATA key, TALLOC_CTX *mem_ctx, struct ctdb_ltdb_header **hdr, TDB_DATA *data)
602 int ret;
604 struct ctdb_call call;
605 ZERO_STRUCT(call);
607 call.call_id = CTDB_FETCH_WITH_HEADER_FUNC;
608 call.call_data.dptr = NULL;
609 call.call_data.dsize = 0;
610 call.key = key;
611 call.flags = CTDB_WANT_READONLY;
612 ret = ctdb_call(ctdb_db, &call);
614 if (ret != 0) {
615 return -1;
617 if (call.reply_data.dsize < sizeof(struct ctdb_ltdb_header)) {
618 return -1;
621 *hdr = talloc_memdup(mem_ctx, &call.reply_data.dptr[0], sizeof(struct ctdb_ltdb_header));
622 if (*hdr == NULL) {
623 talloc_free(call.reply_data.dptr);
624 return -1;
627 data->dsize = call.reply_data.dsize - sizeof(struct ctdb_ltdb_header);
628 data->dptr = talloc_memdup(mem_ctx, &call.reply_data.dptr[sizeof(struct ctdb_ltdb_header)], data->dsize);
629 if (data->dptr == NULL) {
630 talloc_free(call.reply_data.dptr);
631 talloc_free(hdr);
632 return -1;
635 return 0;
639 get a lock on a record, and return the records data. Blocks until it gets the lock
641 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
642 TDB_DATA key, TDB_DATA *data)
644 int ret;
645 struct ctdb_record_handle *h;
648 procedure is as follows:
650 1) get the chain lock.
651 2) check if we are dmaster
652 3) if we are the dmaster then return handle
653 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
654 reply from ctdbd
655 5) when we get the reply, goto (1)
658 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
659 if (h == NULL) {
660 return NULL;
663 h->ctdb_db = ctdb_db;
664 h->key = key;
665 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
666 if (h->key.dptr == NULL) {
667 talloc_free(h);
668 return NULL;
670 h->data = data;
672 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
673 (const char *)key.dptr));
675 again:
676 /* step 1 - get the chain lock */
677 ret = ctdb_ltdb_lock(ctdb_db, key);
678 if (ret != 0) {
679 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
680 talloc_free(h);
681 return NULL;
684 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
686 talloc_set_destructor(h, fetch_lock_destructor);
688 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
690 /* when torturing, ensure we test the remote path */
691 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
692 random() % 5 == 0) {
693 h->header.dmaster = (uint32_t)-1;
697 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
699 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
700 ctdb_ltdb_unlock(ctdb_db, key);
701 ret = ctdb_client_force_migration(ctdb_db, key);
702 if (ret != 0) {
703 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
704 talloc_free(h);
705 return NULL;
707 goto again;
710 /* if this is a request for read/write and we have delegations
711 we have to revoke all delegations first
713 if ((h->header.dmaster == ctdb_db->ctdb->pnn) &&
714 (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
715 ctdb_ltdb_unlock(ctdb_db, key);
716 ret = ctdb_client_force_migration(ctdb_db, key);
717 if (ret != 0) {
718 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
719 talloc_free(h);
720 return NULL;
722 goto again;
725 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
726 return h;
730 get a readonly lock on a record, and return the records data. Blocks until it gets the lock
732 struct ctdb_record_handle *
733 ctdb_fetch_readonly_lock(
734 struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
735 TDB_DATA key, TDB_DATA *data,
736 int read_only)
738 int ret;
739 struct ctdb_record_handle *h;
740 struct ctdb_ltdb_header *roheader = NULL;
742 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
743 if (h == NULL) {
744 return NULL;
747 h->ctdb_db = ctdb_db;
748 h->key = key;
749 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
750 if (h->key.dptr == NULL) {
751 talloc_free(h);
752 return NULL;
754 h->data = data;
756 data->dptr = NULL;
757 data->dsize = 0;
760 again:
761 talloc_free(roheader);
762 roheader = NULL;
764 talloc_free(data->dptr);
765 data->dptr = NULL;
766 data->dsize = 0;
768 /* Lock the record/chain */
769 ret = ctdb_ltdb_lock(ctdb_db, key);
770 if (ret != 0) {
771 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
772 talloc_free(h);
773 return NULL;
776 talloc_set_destructor(h, fetch_lock_destructor);
778 /* Check if record exists yet in the TDB */
779 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
780 if (ret != 0) {
781 ctdb_ltdb_unlock(ctdb_db, key);
782 ret = ctdb_client_force_migration(ctdb_db, key);
783 if (ret != 0) {
784 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
785 talloc_free(h);
786 return NULL;
788 goto again;
791 /* if this is a request for read/write and we have delegations
792 we have to revoke all delegations first
794 if ((read_only == 0)
795 && (h->header.dmaster == ctdb_db->ctdb->pnn)
796 && (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
797 ctdb_ltdb_unlock(ctdb_db, key);
798 ret = ctdb_client_force_migration(ctdb_db, key);
799 if (ret != 0) {
800 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
801 talloc_free(h);
802 return NULL;
804 goto again;
807 /* if we are dmaster, just return the handle */
808 if (h->header.dmaster == ctdb_db->ctdb->pnn) {
809 return h;
812 if (read_only != 0) {
813 TDB_DATA rodata = {NULL, 0};
815 if ((h->header.flags & CTDB_REC_RO_HAVE_READONLY)
816 || (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
817 return h;
820 ctdb_ltdb_unlock(ctdb_db, key);
821 ret = ctdb_client_fetch_readonly(ctdb_db, key, h, &roheader, &rodata);
822 if (ret != 0) {
823 DEBUG(DEBUG_ERR,("ctdb_fetch_readonly_lock: failed. force migration and try again\n"));
824 ret = ctdb_client_force_migration(ctdb_db, key);
825 if (ret != 0) {
826 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
827 talloc_free(h);
828 return NULL;
831 goto again;
834 if (!(roheader->flags&CTDB_REC_RO_HAVE_READONLY)) {
835 ret = ctdb_client_force_migration(ctdb_db, key);
836 if (ret != 0) {
837 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
838 talloc_free(h);
839 return NULL;
842 goto again;
845 ret = ctdb_ltdb_lock(ctdb_db, key);
846 if (ret != 0) {
847 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
848 talloc_free(h);
849 return NULL;
852 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
853 if (ret != 0) {
854 ctdb_ltdb_unlock(ctdb_db, key);
856 ret = ctdb_client_force_migration(ctdb_db, key);
857 if (ret != 0) {
858 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
859 talloc_free(h);
860 return NULL;
863 goto again;
866 return h;
869 /* we are not dmaster and this was not a request for a readonly lock
870 * so unlock the record, migrate it and try again
872 ctdb_ltdb_unlock(ctdb_db, key);
873 ret = ctdb_client_force_migration(ctdb_db, key);
874 if (ret != 0) {
875 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
876 talloc_free(h);
877 return NULL;
879 goto again;
883 store some data to the record that was locked with ctdb_fetch_lock()
885 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
887 if (h->ctdb_db->persistent) {
888 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
889 return -1;
892 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
896 non-locking fetch of a record
898 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
899 TDB_DATA key, TDB_DATA *data)
901 struct ctdb_call call;
902 int ret;
904 call.call_id = CTDB_FETCH_FUNC;
905 call.call_data.dptr = NULL;
906 call.call_data.dsize = 0;
907 call.key = key;
909 ret = ctdb_call(ctdb_db, &call);
911 if (ret == 0) {
912 *data = call.reply_data;
913 talloc_steal(mem_ctx, data->dptr);
916 return ret;
922 called when a control completes or timesout to invoke the callback
923 function the user provided
925 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
926 struct timeval t, void *private_data)
928 struct ctdb_client_control_state *state;
929 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
930 int ret;
932 state = talloc_get_type(private_data, struct ctdb_client_control_state);
933 talloc_steal(tmp_ctx, state);
935 ret = ctdb_control_recv(state->ctdb, state, state,
936 NULL,
937 NULL,
938 NULL);
939 if (ret != 0) {
940 DEBUG(DEBUG_DEBUG,("ctdb_control_recv() failed, ignoring return code %d\n", ret));
943 talloc_free(tmp_ctx);
947 called when a CTDB_REPLY_CONTROL packet comes in in the client
949 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
950 contains any reply data from the control
952 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
953 struct ctdb_req_header *hdr)
955 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
956 struct ctdb_client_control_state *state;
958 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
959 if (state == NULL) {
960 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
961 return;
964 if (hdr->reqid != state->reqid) {
965 /* we found a record but it was the wrong one */
966 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
967 return;
970 state->outdata.dptr = c->data;
971 state->outdata.dsize = c->datalen;
972 state->status = c->status;
973 if (c->errorlen) {
974 state->errormsg = talloc_strndup(state,
975 (char *)&c->data[c->datalen],
976 c->errorlen);
979 /* state->outdata now uses resources from c so we dont want c
980 to just dissappear from under us while state is still alive
982 talloc_steal(state, c);
984 state->state = CTDB_CONTROL_DONE;
986 /* if we had a callback registered for this control, pull the response
987 and call the callback.
989 if (state->async.fn) {
990 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
996 destroy a ctdb_control in client
998 static int ctdb_client_control_destructor(struct ctdb_client_control_state *state)
1000 ctdb_reqid_remove(state->ctdb, state->reqid);
1001 return 0;
1005 /* time out handler for ctdb_control */
1006 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
1007 struct timeval t, void *private_data)
1009 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
1011 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
1012 "dstnode:%u\n", state->reqid, state->c->opcode,
1013 state->c->hdr.destnode));
1015 state->state = CTDB_CONTROL_TIMEOUT;
1017 /* if we had a callback registered for this control, pull the response
1018 and call the callback.
1020 if (state->async.fn) {
1021 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
1025 /* async version of send control request */
1026 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
1027 uint32_t destnode, uint64_t srvid,
1028 uint32_t opcode, uint32_t flags, TDB_DATA data,
1029 TALLOC_CTX *mem_ctx,
1030 struct timeval *timeout,
1031 char **errormsg)
1033 struct ctdb_client_control_state *state;
1034 size_t len;
1035 struct ctdb_req_control *c;
1036 int ret;
1038 if (errormsg) {
1039 *errormsg = NULL;
1042 /* if the domain socket is not yet open, open it */
1043 if (ctdb->daemon.sd==-1) {
1044 ctdb_socket_connect(ctdb);
1047 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
1048 CTDB_NO_MEMORY_NULL(ctdb, state);
1050 state->ctdb = ctdb;
1051 state->reqid = ctdb_reqid_new(ctdb, state);
1052 state->state = CTDB_CONTROL_WAIT;
1053 state->errormsg = NULL;
1055 talloc_set_destructor(state, ctdb_client_control_destructor);
1057 len = offsetof(struct ctdb_req_control, data) + data.dsize;
1058 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
1059 len, struct ctdb_req_control);
1060 state->c = c;
1061 CTDB_NO_MEMORY_NULL(ctdb, c);
1062 c->hdr.reqid = state->reqid;
1063 c->hdr.destnode = destnode;
1064 c->opcode = opcode;
1065 c->client_id = 0;
1066 c->flags = flags;
1067 c->srvid = srvid;
1068 c->datalen = data.dsize;
1069 if (data.dsize) {
1070 memcpy(&c->data[0], data.dptr, data.dsize);
1073 /* timeout */
1074 if (timeout && !timeval_is_zero(timeout)) {
1075 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
1078 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
1079 if (ret != 0) {
1080 talloc_free(state);
1081 return NULL;
1084 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1085 talloc_free(state);
1086 return NULL;
1089 return state;
1093 /* async version of receive control reply */
1094 int ctdb_control_recv(struct ctdb_context *ctdb,
1095 struct ctdb_client_control_state *state,
1096 TALLOC_CTX *mem_ctx,
1097 TDB_DATA *outdata, int32_t *status, char **errormsg)
1099 TALLOC_CTX *tmp_ctx;
1101 if (status != NULL) {
1102 *status = -1;
1104 if (errormsg != NULL) {
1105 *errormsg = NULL;
1108 if (state == NULL) {
1109 return -1;
1112 /* prevent double free of state */
1113 tmp_ctx = talloc_new(ctdb);
1114 talloc_steal(tmp_ctx, state);
1116 /* loop one event at a time until we either timeout or the control
1117 completes.
1119 while (state->state == CTDB_CONTROL_WAIT) {
1120 event_loop_once(ctdb->ev);
1123 if (state->state != CTDB_CONTROL_DONE) {
1124 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
1125 if (state->async.fn) {
1126 state->async.fn(state);
1128 talloc_free(tmp_ctx);
1129 return -1;
1132 if (state->errormsg) {
1133 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
1134 if (errormsg) {
1135 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
1137 if (state->async.fn) {
1138 state->async.fn(state);
1140 talloc_free(tmp_ctx);
1141 return -1;
1144 if (outdata) {
1145 *outdata = state->outdata;
1146 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
1149 if (status) {
1150 *status = state->status;
1153 if (state->async.fn) {
1154 state->async.fn(state);
1157 talloc_free(tmp_ctx);
1158 return 0;
1164 send a ctdb control message
1165 timeout specifies how long we should wait for a reply.
1166 if timeout is NULL we wait indefinitely
1168 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
1169 uint32_t opcode, uint32_t flags, TDB_DATA data,
1170 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
1171 struct timeval *timeout,
1172 char **errormsg)
1174 struct ctdb_client_control_state *state;
1176 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
1177 flags, data, mem_ctx,
1178 timeout, errormsg);
1180 /* FIXME: Error conditions in ctdb_control_send return NULL without
1181 * setting errormsg. So, there is no way to distinguish between sucess
1182 * and failure when CTDB_CTRL_FLAG_NOREPLY is set */
1183 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1184 if (status != NULL) {
1185 *status = 0;
1187 return 0;
1190 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
1191 errormsg);
1198 a process exists call. Returns 0 if process exists, -1 otherwise
1200 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
1202 int ret;
1203 TDB_DATA data;
1204 int32_t status;
1206 data.dptr = (uint8_t*)&pid;
1207 data.dsize = sizeof(pid);
1209 ret = ctdb_control(ctdb, destnode, 0,
1210 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
1211 NULL, NULL, &status, NULL, NULL);
1212 if (ret != 0) {
1213 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
1214 return -1;
1217 return status;
1221 get remote statistics
1223 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
1225 int ret;
1226 TDB_DATA data;
1227 int32_t res;
1229 ret = ctdb_control(ctdb, destnode, 0,
1230 CTDB_CONTROL_STATISTICS, 0, tdb_null,
1231 ctdb, &data, &res, NULL, NULL);
1232 if (ret != 0 || res != 0) {
1233 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
1234 return -1;
1237 if (data.dsize != sizeof(struct ctdb_statistics)) {
1238 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
1239 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
1240 return -1;
1243 *status = *(struct ctdb_statistics *)data.dptr;
1244 talloc_free(data.dptr);
1246 return 0;
1250 * get db statistics
1252 int ctdb_ctrl_dbstatistics(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1253 TALLOC_CTX *mem_ctx, struct ctdb_db_statistics **dbstat)
1255 int ret;
1256 TDB_DATA indata, outdata;
1257 int32_t res;
1258 struct ctdb_db_statistics *wire, *s;
1259 char *ptr;
1260 int i;
1262 indata.dptr = (uint8_t *)&dbid;
1263 indata.dsize = sizeof(dbid);
1265 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_STATISTICS,
1266 0, indata, ctdb, &outdata, &res, NULL, NULL);
1267 if (ret != 0 || res != 0) {
1268 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for dbstatistics failed\n"));
1269 return -1;
1272 if (outdata.dsize < offsetof(struct ctdb_db_statistics, hot_keys_wire)) {
1273 DEBUG(DEBUG_ERR,(__location__ " Wrong dbstatistics size %zi - expected >= %lu\n",
1274 outdata.dsize,
1275 (long unsigned int)sizeof(struct ctdb_statistics)));
1276 return -1;
1279 s = talloc_zero(mem_ctx, struct ctdb_db_statistics);
1280 if (s == NULL) {
1281 talloc_free(outdata.dptr);
1282 CTDB_NO_MEMORY(ctdb, s);
1285 wire = (struct ctdb_db_statistics *)outdata.dptr;
1286 *s = *wire;
1287 ptr = &wire->hot_keys_wire[0];
1288 for (i=0; i<wire->num_hot_keys; i++) {
1289 s->hot_keys[i].key.dptr = talloc_size(mem_ctx, s->hot_keys[i].key.dsize);
1290 if (s->hot_keys[i].key.dptr == NULL) {
1291 talloc_free(outdata.dptr);
1292 CTDB_NO_MEMORY(ctdb, s->hot_keys[i].key.dptr);
1295 memcpy(s->hot_keys[i].key.dptr, ptr, s->hot_keys[i].key.dsize);
1296 ptr += wire->hot_keys[i].key.dsize;
1299 talloc_free(outdata.dptr);
1300 *dbstat = s;
1301 return 0;
1305 shutdown a remote ctdb node
1307 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1309 struct ctdb_client_control_state *state;
1311 state = ctdb_control_send(ctdb, destnode, 0,
1312 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1313 NULL, &timeout, NULL);
1314 if (state == NULL) {
1315 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1316 return -1;
1319 return 0;
1323 get vnn map from a remote node
1325 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1327 int ret;
1328 TDB_DATA outdata;
1329 int32_t res;
1330 struct ctdb_vnn_map_wire *map;
1332 ret = ctdb_control(ctdb, destnode, 0,
1333 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1334 mem_ctx, &outdata, &res, &timeout, NULL);
1335 if (ret != 0 || res != 0) {
1336 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1337 return -1;
1340 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1341 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1342 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1343 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1344 return -1;
1347 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1348 CTDB_NO_MEMORY(ctdb, *vnnmap);
1349 (*vnnmap)->generation = map->generation;
1350 (*vnnmap)->size = map->size;
1351 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1353 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1354 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1355 talloc_free(outdata.dptr);
1357 return 0;
1362 get the recovery mode of a remote node
1364 struct ctdb_client_control_state *
1365 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1367 return ctdb_control_send(ctdb, destnode, 0,
1368 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1369 mem_ctx, &timeout, NULL);
1372 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1374 int ret;
1375 int32_t res;
1377 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1378 if (ret != 0) {
1379 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1380 return -1;
1383 if (recmode) {
1384 *recmode = (uint32_t)res;
1387 return 0;
1390 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1392 struct ctdb_client_control_state *state;
1394 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1395 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1402 set the recovery mode of a remote node
1404 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1406 int ret;
1407 TDB_DATA data;
1408 int32_t res;
1410 data.dsize = sizeof(uint32_t);
1411 data.dptr = (unsigned char *)&recmode;
1413 ret = ctdb_control(ctdb, destnode, 0,
1414 CTDB_CONTROL_SET_RECMODE, 0, data,
1415 NULL, NULL, &res, &timeout, NULL);
1416 if (ret != 0 || res != 0) {
1417 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1418 return -1;
1421 return 0;
1427 get the recovery master of a remote node
1429 struct ctdb_client_control_state *
1430 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1431 struct timeval timeout, uint32_t destnode)
1433 return ctdb_control_send(ctdb, destnode, 0,
1434 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1435 mem_ctx, &timeout, NULL);
1438 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1440 int ret;
1441 int32_t res;
1443 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1444 if (ret != 0) {
1445 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1446 return -1;
1449 if (recmaster) {
1450 *recmaster = (uint32_t)res;
1453 return 0;
1456 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1458 struct ctdb_client_control_state *state;
1460 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1461 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1466 set the recovery master of a remote node
1468 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1470 int ret;
1471 TDB_DATA data;
1472 int32_t res;
1474 ZERO_STRUCT(data);
1475 data.dsize = sizeof(uint32_t);
1476 data.dptr = (unsigned char *)&recmaster;
1478 ret = ctdb_control(ctdb, destnode, 0,
1479 CTDB_CONTROL_SET_RECMASTER, 0, data,
1480 NULL, NULL, &res, &timeout, NULL);
1481 if (ret != 0 || res != 0) {
1482 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1483 return -1;
1486 return 0;
1491 get a list of databases off a remote node
1493 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1494 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1496 int ret;
1497 TDB_DATA outdata;
1498 int32_t res;
1500 ret = ctdb_control(ctdb, destnode, 0,
1501 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1502 mem_ctx, &outdata, &res, &timeout, NULL);
1503 if (ret != 0 || res != 0) {
1504 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1505 return -1;
1508 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1509 talloc_free(outdata.dptr);
1511 return 0;
1515 get a list of nodes (vnn and flags ) from a remote node
1517 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1518 struct timeval timeout, uint32_t destnode,
1519 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1521 int ret;
1522 TDB_DATA outdata;
1523 int32_t res;
1525 ret = ctdb_control(ctdb, destnode, 0,
1526 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1527 mem_ctx, &outdata, &res, &timeout, NULL);
1528 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1529 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1530 return -1;
1533 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1534 talloc_free(outdata.dptr);
1535 return 0;
1539 load nodes file on a remote node and return as a node map
1541 int ctdb_ctrl_getnodesfile(struct ctdb_context *ctdb,
1542 struct timeval timeout, uint32_t destnode,
1543 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1545 int ret;
1546 TDB_DATA outdata;
1547 int32_t res;
1549 ret = ctdb_control(ctdb, destnode, 0,
1550 CTDB_CONTROL_GET_NODES_FILE, 0, tdb_null,
1551 mem_ctx, &outdata, &res, &timeout, NULL);
1552 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1553 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1554 return -1;
1557 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1558 talloc_free(outdata.dptr);
1560 return 0;
1564 drop the transport, reload the nodes file and restart the transport
1566 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1567 struct timeval timeout, uint32_t destnode)
1569 int ret;
1570 int32_t res;
1572 ret = ctdb_control(ctdb, destnode, 0,
1573 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1574 NULL, NULL, &res, &timeout, NULL);
1575 if (ret != 0 || res != 0) {
1576 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1577 return -1;
1580 return 0;
1585 set vnn map on a node
1587 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1588 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1590 int ret;
1591 TDB_DATA data;
1592 int32_t res;
1593 struct ctdb_vnn_map_wire *map;
1594 size_t len;
1596 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1597 map = talloc_size(mem_ctx, len);
1598 CTDB_NO_MEMORY(ctdb, map);
1600 map->generation = vnnmap->generation;
1601 map->size = vnnmap->size;
1602 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1604 data.dsize = len;
1605 data.dptr = (uint8_t *)map;
1607 ret = ctdb_control(ctdb, destnode, 0,
1608 CTDB_CONTROL_SETVNNMAP, 0, data,
1609 NULL, NULL, &res, &timeout, NULL);
1610 if (ret != 0 || res != 0) {
1611 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1612 return -1;
1615 talloc_free(map);
1617 return 0;
1622 async send for pull database
1624 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1625 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1626 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1628 TDB_DATA indata;
1629 struct ctdb_control_pulldb *pull;
1630 struct ctdb_client_control_state *state;
1632 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1633 CTDB_NO_MEMORY_NULL(ctdb, pull);
1635 pull->db_id = dbid;
1636 pull->lmaster = lmaster;
1638 indata.dsize = sizeof(struct ctdb_control_pulldb);
1639 indata.dptr = (unsigned char *)pull;
1641 state = ctdb_control_send(ctdb, destnode, 0,
1642 CTDB_CONTROL_PULL_DB, 0, indata,
1643 mem_ctx, &timeout, NULL);
1644 talloc_free(pull);
1646 return state;
1650 async recv for pull database
1652 int ctdb_ctrl_pulldb_recv(
1653 struct ctdb_context *ctdb,
1654 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1655 TDB_DATA *outdata)
1657 int ret;
1658 int32_t res;
1660 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1661 if ( (ret != 0) || (res != 0) ){
1662 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1663 return -1;
1666 return 0;
1670 pull all keys and records for a specific database on a node
1672 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1673 uint32_t dbid, uint32_t lmaster,
1674 TALLOC_CTX *mem_ctx, struct timeval timeout,
1675 TDB_DATA *outdata)
1677 struct ctdb_client_control_state *state;
1679 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1680 timeout);
1682 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1687 change dmaster for all keys in the database to the new value
1689 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1690 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1692 int ret;
1693 TDB_DATA indata;
1694 int32_t res;
1696 indata.dsize = 2*sizeof(uint32_t);
1697 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1699 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1700 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1702 ret = ctdb_control(ctdb, destnode, 0,
1703 CTDB_CONTROL_SET_DMASTER, 0, indata,
1704 NULL, NULL, &res, &timeout, NULL);
1705 if (ret != 0 || res != 0) {
1706 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1707 return -1;
1710 return 0;
1714 ping a node, return number of clients connected
1716 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1718 int ret;
1719 int32_t res;
1721 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1722 tdb_null, NULL, NULL, &res, NULL, NULL);
1723 if (ret != 0) {
1724 return -1;
1726 return res;
1729 int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb,
1730 struct timeval timeout,
1731 uint32_t destnode,
1732 uint32_t *runstate)
1734 TDB_DATA outdata;
1735 int32_t res;
1736 int ret;
1738 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RUNSTATE, 0,
1739 tdb_null, ctdb, &outdata, &res, &timeout, NULL);
1740 if (ret != 0 || res != 0) {
1741 DEBUG(DEBUG_ERR,("ctdb_control for get_runstate failed\n"));
1742 return ret != 0 ? ret : res;
1745 if (outdata.dsize != sizeof(uint32_t)) {
1746 DEBUG(DEBUG_ERR,("Invalid return data in get_runstate\n"));
1747 talloc_free(outdata.dptr);
1748 return -1;
1751 if (runstate != NULL) {
1752 *runstate = *(uint32_t *)outdata.dptr;
1754 talloc_free(outdata.dptr);
1756 return 0;
1760 find the real path to a ltdb
1762 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1763 const char **path)
1765 int ret;
1766 int32_t res;
1767 TDB_DATA data;
1769 data.dptr = (uint8_t *)&dbid;
1770 data.dsize = sizeof(dbid);
1772 ret = ctdb_control(ctdb, destnode, 0,
1773 CTDB_CONTROL_GETDBPATH, 0, data,
1774 mem_ctx, &data, &res, &timeout, NULL);
1775 if (ret != 0 || res != 0) {
1776 return -1;
1779 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1780 if ((*path) == NULL) {
1781 return -1;
1784 talloc_free(data.dptr);
1786 return 0;
1790 find the name of a db
1792 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1793 const char **name)
1795 int ret;
1796 int32_t res;
1797 TDB_DATA data;
1799 data.dptr = (uint8_t *)&dbid;
1800 data.dsize = sizeof(dbid);
1802 ret = ctdb_control(ctdb, destnode, 0,
1803 CTDB_CONTROL_GET_DBNAME, 0, data,
1804 mem_ctx, &data, &res, &timeout, NULL);
1805 if (ret != 0 || res != 0) {
1806 return -1;
1809 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1810 if ((*name) == NULL) {
1811 return -1;
1814 talloc_free(data.dptr);
1816 return 0;
1820 get the health status of a db
1822 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1823 struct timeval timeout,
1824 uint32_t destnode,
1825 uint32_t dbid, TALLOC_CTX *mem_ctx,
1826 const char **reason)
1828 int ret;
1829 int32_t res;
1830 TDB_DATA data;
1832 data.dptr = (uint8_t *)&dbid;
1833 data.dsize = sizeof(dbid);
1835 ret = ctdb_control(ctdb, destnode, 0,
1836 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1837 mem_ctx, &data, &res, &timeout, NULL);
1838 if (ret != 0 || res != 0) {
1839 return -1;
1842 if (data.dsize == 0) {
1843 (*reason) = NULL;
1844 return 0;
1847 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1848 if ((*reason) == NULL) {
1849 return -1;
1852 talloc_free(data.dptr);
1854 return 0;
1858 * get db sequence number
1860 int ctdb_ctrl_getdbseqnum(struct ctdb_context *ctdb, struct timeval timeout,
1861 uint32_t destnode, uint32_t dbid, uint64_t *seqnum)
1863 int ret;
1864 int32_t res;
1865 TDB_DATA data, outdata;
1867 data.dptr = (uint8_t *)&dbid;
1868 data.dsize = sizeof(uint64_t); /* This is just wrong */
1870 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_SEQNUM,
1871 0, data, ctdb, &outdata, &res, &timeout, NULL);
1872 if (ret != 0 || res != 0) {
1873 DEBUG(DEBUG_ERR,("ctdb_control for getdbesqnum failed\n"));
1874 return -1;
1877 if (outdata.dsize != sizeof(uint64_t)) {
1878 DEBUG(DEBUG_ERR,("Invalid return data in get_dbseqnum\n"));
1879 talloc_free(outdata.dptr);
1880 return -1;
1883 if (seqnum != NULL) {
1884 *seqnum = *(uint64_t *)outdata.dptr;
1886 talloc_free(outdata.dptr);
1888 return 0;
1892 create a database
1894 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1895 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1897 int ret;
1898 int32_t res;
1899 TDB_DATA data;
1900 uint64_t tdb_flags = 0;
1902 data.dptr = discard_const(name);
1903 data.dsize = strlen(name)+1;
1905 /* Make sure that volatile databases use jenkins hash */
1906 if (!persistent) {
1907 tdb_flags = TDB_INCOMPATIBLE_HASH;
1910 #ifdef TDB_MUTEX_LOCKING
1911 if (!persistent && ctdb->tunable.mutex_enabled == 1) {
1912 tdb_flags |= (TDB_MUTEX_LOCKING | TDB_CLEAR_IF_FIRST);
1914 #endif
1916 ret = ctdb_control(ctdb, destnode, tdb_flags,
1917 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1918 0, data,
1919 mem_ctx, &data, &res, &timeout, NULL);
1921 if (ret != 0 || res != 0) {
1922 return -1;
1925 return 0;
1929 get debug level on a node
1931 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1933 int ret;
1934 int32_t res;
1935 TDB_DATA data;
1937 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1938 ctdb, &data, &res, NULL, NULL);
1939 if (ret != 0 || res != 0) {
1940 return -1;
1942 if (data.dsize != sizeof(int32_t)) {
1943 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1944 (unsigned)data.dsize));
1945 return -1;
1947 *level = *(int32_t *)data.dptr;
1948 talloc_free(data.dptr);
1949 return 0;
1953 set debug level on a node
1955 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1957 int ret;
1958 int32_t res;
1959 TDB_DATA data;
1961 data.dptr = (uint8_t *)&level;
1962 data.dsize = sizeof(level);
1964 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1965 NULL, NULL, &res, NULL, NULL);
1966 if (ret != 0 || res != 0) {
1967 return -1;
1969 return 0;
1974 get a list of connected nodes
1976 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1977 struct timeval timeout,
1978 TALLOC_CTX *mem_ctx,
1979 uint32_t *num_nodes)
1981 struct ctdb_node_map *map=NULL;
1982 int ret, i;
1983 uint32_t *nodes;
1985 *num_nodes = 0;
1987 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1988 if (ret != 0) {
1989 return NULL;
1992 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1993 if (nodes == NULL) {
1994 return NULL;
1997 for (i=0;i<map->num;i++) {
1998 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1999 nodes[*num_nodes] = map->nodes[i].pnn;
2000 (*num_nodes)++;
2004 return nodes;
2009 reset remote status
2011 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
2013 int ret;
2014 int32_t res;
2016 ret = ctdb_control(ctdb, destnode, 0,
2017 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
2018 NULL, NULL, &res, NULL, NULL);
2019 if (ret != 0 || res != 0) {
2020 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
2021 return -1;
2023 return 0;
2027 attach to a specific database - client call
2029 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb,
2030 struct timeval timeout,
2031 const char *name,
2032 bool persistent,
2033 uint32_t tdb_flags)
2035 struct ctdb_db_context *ctdb_db;
2036 TDB_DATA data;
2037 int ret;
2038 int32_t res;
2039 #ifdef TDB_MUTEX_LOCKING
2040 uint32_t mutex_enabled = 0;
2041 #endif
2043 ctdb_db = ctdb_db_handle(ctdb, name);
2044 if (ctdb_db) {
2045 return ctdb_db;
2048 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
2049 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
2051 ctdb_db->ctdb = ctdb;
2052 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
2053 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
2055 data.dptr = discard_const(name);
2056 data.dsize = strlen(name)+1;
2058 /* CTDB has switched to using jenkins hash for volatile databases.
2059 * Even if tdb_flags do not explicitly mention TDB_INCOMPATIBLE_HASH,
2060 * always set it.
2062 if (!persistent) {
2063 tdb_flags |= TDB_INCOMPATIBLE_HASH;
2066 #ifdef TDB_MUTEX_LOCKING
2067 if (!persistent) {
2068 ret = ctdb_ctrl_get_tunable(ctdb, timeval_current_ofs(3,0),
2069 CTDB_CURRENT_NODE,
2070 "TDBMutexEnabled",
2071 &mutex_enabled);
2072 if (ret != 0) {
2073 DEBUG(DEBUG_WARNING, ("Assuming no mutex support.\n"));
2076 if (mutex_enabled == 1) {
2077 tdb_flags |= (TDB_MUTEX_LOCKING | TDB_CLEAR_IF_FIRST);
2080 #endif
2082 /* tell ctdb daemon to attach */
2083 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
2084 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
2085 0, data, ctdb_db, &data, &res, NULL, NULL);
2086 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
2087 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
2088 talloc_free(ctdb_db);
2089 return NULL;
2092 ctdb_db->db_id = *(uint32_t *)data.dptr;
2093 talloc_free(data.dptr);
2095 ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
2096 if (ret != 0) {
2097 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
2098 talloc_free(ctdb_db);
2099 return NULL;
2102 if (persistent) {
2103 tdb_flags = TDB_DEFAULT;
2104 } else {
2105 tdb_flags = TDB_NOSYNC;
2106 #ifdef TDB_MUTEX_LOCKING
2107 if (mutex_enabled) {
2108 tdb_flags |= (TDB_MUTEX_LOCKING | TDB_CLEAR_IF_FIRST);
2110 #endif
2112 if (ctdb->valgrinding) {
2113 tdb_flags |= TDB_NOMMAP;
2115 tdb_flags |= TDB_DISALLOW_NESTING;
2117 ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path, 0, tdb_flags,
2118 O_RDWR, 0);
2119 if (ctdb_db->ltdb == NULL) {
2120 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
2121 talloc_free(ctdb_db);
2122 return NULL;
2125 ctdb_db->persistent = persistent;
2127 DLIST_ADD(ctdb->db_list, ctdb_db);
2129 /* add well known functions */
2130 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
2131 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
2132 ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC);
2134 return ctdb_db;
2138 * detach from a specific database - client call
2140 int ctdb_detach(struct ctdb_context *ctdb, uint32_t db_id)
2142 int ret;
2143 int32_t status;
2144 TDB_DATA data;
2146 data.dsize = sizeof(db_id);
2147 data.dptr = (uint8_t *)&db_id;
2149 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_DB_DETACH,
2150 0, data, NULL, NULL, &status, NULL, NULL);
2151 if (ret != 0 || status != 0) {
2152 return -1;
2154 return 0;
2158 setup a call for a database
2160 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
2162 struct ctdb_registered_call *call;
2164 #if 0
2165 TDB_DATA data;
2166 int32_t status;
2167 struct ctdb_control_set_call c;
2168 int ret;
2170 /* this is no longer valid with the separate daemon architecture */
2171 c.db_id = ctdb_db->db_id;
2172 c.fn = fn;
2173 c.id = id;
2175 data.dptr = (uint8_t *)&c;
2176 data.dsize = sizeof(c);
2178 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
2179 data, NULL, NULL, &status, NULL, NULL);
2180 if (ret != 0 || status != 0) {
2181 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
2182 return -1;
2184 #endif
2186 /* also register locally */
2187 call = talloc(ctdb_db, struct ctdb_registered_call);
2188 call->fn = fn;
2189 call->id = id;
2191 DLIST_ADD(ctdb_db->calls, call);
2192 return 0;
2196 struct traverse_state {
2197 bool done;
2198 uint32_t count;
2199 ctdb_traverse_func fn;
2200 void *private_data;
2201 bool listemptyrecords;
2205 called on each key during a ctdb_traverse
2207 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
2209 struct traverse_state *state = (struct traverse_state *)p;
2210 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
2211 TDB_DATA key;
2213 if (data.dsize < sizeof(uint32_t) ||
2214 d->length != data.dsize) {
2215 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
2216 state->done = true;
2217 return;
2220 key.dsize = d->keylen;
2221 key.dptr = &d->data[0];
2222 data.dsize = d->datalen;
2223 data.dptr = &d->data[d->keylen];
2225 if (key.dsize == 0 && data.dsize == 0) {
2226 /* end of traverse */
2227 state->done = true;
2228 return;
2231 if (!state->listemptyrecords &&
2232 data.dsize == sizeof(struct ctdb_ltdb_header))
2234 /* empty records are deleted records in ctdb */
2235 return;
2238 if (state->fn(ctdb, key, data, state->private_data) != 0) {
2239 state->done = true;
2242 state->count++;
2246 * start a cluster wide traverse, calling the supplied fn on each record
2247 * return the number of records traversed, or -1 on error
2249 * Extendet variant with a flag to signal whether empty records should
2250 * be listed.
2252 static int ctdb_traverse_ext(struct ctdb_db_context *ctdb_db,
2253 ctdb_traverse_func fn,
2254 bool withemptyrecords,
2255 void *private_data)
2257 TDB_DATA data;
2258 struct ctdb_traverse_start_ext t;
2259 int32_t status;
2260 int ret;
2261 uint64_t srvid = (getpid() | 0xFLL<<60);
2262 struct traverse_state state;
2264 state.done = false;
2265 state.count = 0;
2266 state.private_data = private_data;
2267 state.fn = fn;
2268 state.listemptyrecords = withemptyrecords;
2270 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
2271 if (ret != 0) {
2272 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
2273 return -1;
2276 t.db_id = ctdb_db->db_id;
2277 t.srvid = srvid;
2278 t.reqid = 0;
2279 t.withemptyrecords = withemptyrecords;
2281 data.dptr = (uint8_t *)&t;
2282 data.dsize = sizeof(t);
2284 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START_EXT, 0,
2285 data, NULL, NULL, &status, NULL, NULL);
2286 if (ret != 0 || status != 0) {
2287 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
2288 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2289 return -1;
2292 while (!state.done) {
2293 event_loop_once(ctdb_db->ctdb->ev);
2296 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2297 if (ret != 0) {
2298 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
2299 return -1;
2302 return state.count;
2306 * start a cluster wide traverse, calling the supplied fn on each record
2307 * return the number of records traversed, or -1 on error
2309 * Standard version which does not list the empty records:
2310 * These are considered deleted.
2312 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
2314 return ctdb_traverse_ext(ctdb_db, fn, false, private_data);
2317 #define ISASCII(x) (isprint(x) && !strchr("\"\\", (x)))
2319 called on each key during a catdb
2321 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
2323 int i;
2324 struct ctdb_dump_db_context *c = (struct ctdb_dump_db_context *)p;
2325 FILE *f = c->f;
2326 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
2328 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
2329 for (i=0;i<key.dsize;i++) {
2330 if (ISASCII(key.dptr[i])) {
2331 fprintf(f, "%c", key.dptr[i]);
2332 } else {
2333 fprintf(f, "\\%02X", key.dptr[i]);
2336 fprintf(f, "\"\n");
2338 fprintf(f, "dmaster: %u\n", h->dmaster);
2339 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
2341 if (c->printlmaster && ctdb->vnn_map != NULL) {
2342 fprintf(f, "lmaster: %u\n", ctdb_lmaster(ctdb, &key));
2345 if (c->printhash) {
2346 fprintf(f, "hash: 0x%08x\n", ctdb_hash(&key));
2349 if (c->printrecordflags) {
2350 fprintf(f, "flags: 0x%08x", h->flags);
2351 if (h->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) printf(" MIGRATED_WITH_DATA");
2352 if (h->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) printf(" VACUUM_MIGRATED");
2353 if (h->flags & CTDB_REC_FLAG_AUTOMATIC) printf(" AUTOMATIC");
2354 if (h->flags & CTDB_REC_RO_HAVE_DELEGATIONS) printf(" RO_HAVE_DELEGATIONS");
2355 if (h->flags & CTDB_REC_RO_HAVE_READONLY) printf(" RO_HAVE_READONLY");
2356 if (h->flags & CTDB_REC_RO_REVOKING_READONLY) printf(" RO_REVOKING_READONLY");
2357 if (h->flags & CTDB_REC_RO_REVOKE_COMPLETE) printf(" RO_REVOKE_COMPLETE");
2358 fprintf(f, "\n");
2361 if (c->printdatasize) {
2362 fprintf(f, "data size: %u\n", (unsigned)data.dsize);
2363 } else {
2364 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
2365 for (i=sizeof(*h);i<data.dsize;i++) {
2366 if (ISASCII(data.dptr[i])) {
2367 fprintf(f, "%c", data.dptr[i]);
2368 } else {
2369 fprintf(f, "\\%02X", data.dptr[i]);
2372 fprintf(f, "\"\n");
2375 fprintf(f, "\n");
2377 return 0;
2381 convenience function to list all keys to stdout
2383 int ctdb_dump_db(struct ctdb_db_context *ctdb_db,
2384 struct ctdb_dump_db_context *ctx)
2386 return ctdb_traverse_ext(ctdb_db, ctdb_dumpdb_record,
2387 ctx->printemptyrecords, ctx);
2391 get the pid of a ctdb daemon
2393 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
2395 int ret;
2396 int32_t res;
2398 ret = ctdb_control(ctdb, destnode, 0,
2399 CTDB_CONTROL_GET_PID, 0, tdb_null,
2400 NULL, NULL, &res, &timeout, NULL);
2401 if (ret != 0) {
2402 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
2403 return -1;
2406 *pid = res;
2408 return 0;
2413 async freeze send control
2415 struct ctdb_client_control_state *
2416 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
2418 return ctdb_control_send(ctdb, destnode, priority,
2419 CTDB_CONTROL_FREEZE, 0, tdb_null,
2420 mem_ctx, &timeout, NULL);
2424 async freeze recv control
2426 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
2428 int ret;
2429 int32_t res;
2431 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
2432 if ( (ret != 0) || (res != 0) ){
2433 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
2434 return -1;
2437 return 0;
2441 freeze databases of a certain priority
2443 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2445 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2446 struct ctdb_client_control_state *state;
2447 int ret;
2449 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
2450 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
2451 talloc_free(tmp_ctx);
2453 return ret;
2456 /* Freeze all databases */
2457 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2459 int i;
2461 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
2462 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2463 return -1;
2466 return 0;
2470 thaw databases of a certain priority
2472 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2474 int ret;
2475 int32_t res;
2477 ret = ctdb_control(ctdb, destnode, priority,
2478 CTDB_CONTROL_THAW, 0, tdb_null,
2479 NULL, NULL, &res, &timeout, NULL);
2480 if (ret != 0 || res != 0) {
2481 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2482 return -1;
2485 return 0;
2488 /* thaw all databases */
2489 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2491 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2495 get pnn of a node, or -1
2497 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2499 int ret;
2500 int32_t res;
2502 ret = ctdb_control(ctdb, destnode, 0,
2503 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2504 NULL, NULL, &res, &timeout, NULL);
2505 if (ret != 0) {
2506 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2507 return -1;
2510 return res;
2514 get the monitoring mode of a remote node
2516 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2518 int ret;
2519 int32_t res;
2521 ret = ctdb_control(ctdb, destnode, 0,
2522 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2523 NULL, NULL, &res, &timeout, NULL);
2524 if (ret != 0) {
2525 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2526 return -1;
2529 *monmode = res;
2531 return 0;
2536 set the monitoring mode of a remote node to active
2538 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2540 int ret;
2543 ret = ctdb_control(ctdb, destnode, 0,
2544 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2545 NULL, NULL,NULL, &timeout, NULL);
2546 if (ret != 0) {
2547 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2548 return -1;
2553 return 0;
2557 set the monitoring mode of a remote node to disable
2559 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2561 int ret;
2564 ret = ctdb_control(ctdb, destnode, 0,
2565 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2566 NULL, NULL, NULL, &timeout, NULL);
2567 if (ret != 0) {
2568 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2569 return -1;
2574 return 0;
2580 sent to a node to make it take over an ip address
2582 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2583 uint32_t destnode, struct ctdb_public_ip *ip)
2585 TDB_DATA data;
2586 int ret;
2587 int32_t res;
2589 data.dsize = sizeof(*ip);
2590 data.dptr = (uint8_t *)ip;
2592 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2593 data, NULL, NULL, &res, &timeout, NULL);
2594 if (ret != 0 || res != 0) {
2595 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2596 return -1;
2599 return 0;
2604 sent to a node to make it release an ip address
2606 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2607 uint32_t destnode, struct ctdb_public_ip *ip)
2609 TDB_DATA data;
2610 int ret;
2611 int32_t res;
2613 data.dsize = sizeof(*ip);
2614 data.dptr = (uint8_t *)ip;
2616 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0,
2617 data, NULL, NULL, &res, &timeout, NULL);
2618 if (ret != 0 || res != 0) {
2619 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2620 return -1;
2623 return 0;
2628 get a tunable
2630 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2631 struct timeval timeout,
2632 uint32_t destnode,
2633 const char *name, uint32_t *value)
2635 struct ctdb_control_get_tunable *t;
2636 TDB_DATA data, outdata;
2637 int32_t res;
2638 int ret;
2640 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2641 data.dptr = talloc_size(ctdb, data.dsize);
2642 CTDB_NO_MEMORY(ctdb, data.dptr);
2644 t = (struct ctdb_control_get_tunable *)data.dptr;
2645 t->length = strlen(name)+1;
2646 memcpy(t->name, name, t->length);
2648 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2649 &outdata, &res, &timeout, NULL);
2650 talloc_free(data.dptr);
2651 if (ret != 0 || res != 0) {
2652 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2653 return ret != 0 ? ret : res;
2656 if (outdata.dsize != sizeof(uint32_t)) {
2657 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2658 talloc_free(outdata.dptr);
2659 return -1;
2662 *value = *(uint32_t *)outdata.dptr;
2663 talloc_free(outdata.dptr);
2665 return 0;
2669 set a tunable
2671 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2672 struct timeval timeout,
2673 uint32_t destnode,
2674 const char *name, uint32_t value)
2676 struct ctdb_control_set_tunable *t;
2677 TDB_DATA data;
2678 int32_t res;
2679 int ret;
2681 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2682 data.dptr = talloc_size(ctdb, data.dsize);
2683 CTDB_NO_MEMORY(ctdb, data.dptr);
2685 t = (struct ctdb_control_set_tunable *)data.dptr;
2686 t->length = strlen(name)+1;
2687 memcpy(t->name, name, t->length);
2688 t->value = value;
2690 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2691 NULL, &res, &timeout, NULL);
2692 talloc_free(data.dptr);
2693 if ((ret != 0) || (res == -1)) {
2694 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2695 return -1;
2698 return res;
2702 list tunables
2704 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2705 struct timeval timeout,
2706 uint32_t destnode,
2707 TALLOC_CTX *mem_ctx,
2708 const char ***list, uint32_t *count)
2710 TDB_DATA outdata;
2711 int32_t res;
2712 int ret;
2713 struct ctdb_control_list_tunable *t;
2714 char *p, *s, *ptr;
2716 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2717 mem_ctx, &outdata, &res, &timeout, NULL);
2718 if (ret != 0 || res != 0) {
2719 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2720 return -1;
2723 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2724 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2725 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2726 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2727 talloc_free(outdata.dptr);
2728 return -1;
2731 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2732 CTDB_NO_MEMORY(ctdb, p);
2734 talloc_free(outdata.dptr);
2736 (*list) = NULL;
2737 (*count) = 0;
2739 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2740 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2741 CTDB_NO_MEMORY(ctdb, *list);
2742 (*list)[*count] = talloc_strdup(*list, s);
2743 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2744 (*count)++;
2747 talloc_free(p);
2749 return 0;
2753 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2754 struct timeval timeout, uint32_t destnode,
2755 TALLOC_CTX *mem_ctx,
2756 uint32_t flags,
2757 struct ctdb_all_public_ips **ips)
2759 int ret;
2760 TDB_DATA outdata;
2761 int32_t res;
2763 ret = ctdb_control(ctdb, destnode, 0,
2764 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2765 mem_ctx, &outdata, &res, &timeout, NULL);
2766 if (ret != 0 || res != 0) {
2767 DEBUG(DEBUG_ERR,(__location__
2768 " ctdb_control for getpublicips failed ret:%d res:%d\n",
2769 ret, res));
2770 return -1;
2773 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2774 talloc_free(outdata.dptr);
2776 return 0;
2779 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2780 struct timeval timeout, uint32_t destnode,
2781 TALLOC_CTX *mem_ctx,
2782 struct ctdb_all_public_ips **ips)
2784 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2785 destnode, mem_ctx,
2786 0, ips);
2789 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2790 struct timeval timeout, uint32_t destnode,
2791 TALLOC_CTX *mem_ctx,
2792 const ctdb_sock_addr *addr,
2793 struct ctdb_control_public_ip_info **_info)
2795 int ret;
2796 TDB_DATA indata;
2797 TDB_DATA outdata;
2798 int32_t res;
2799 struct ctdb_control_public_ip_info *info;
2800 uint32_t len;
2801 uint32_t i;
2803 indata.dptr = discard_const_p(uint8_t, addr);
2804 indata.dsize = sizeof(*addr);
2806 ret = ctdb_control(ctdb, destnode, 0,
2807 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2808 mem_ctx, &outdata, &res, &timeout, NULL);
2809 if (ret != 0 || res != 0) {
2810 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2811 "failed ret:%d res:%d\n",
2812 ret, res));
2813 return -1;
2816 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2817 if (len > outdata.dsize) {
2818 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2819 "returned invalid data with size %u > %u\n",
2820 (unsigned int)outdata.dsize,
2821 (unsigned int)len));
2822 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2823 return -1;
2826 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2827 len += info->num*sizeof(struct ctdb_control_iface_info);
2829 if (len > outdata.dsize) {
2830 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2831 "returned invalid data with size %u > %u\n",
2832 (unsigned int)outdata.dsize,
2833 (unsigned int)len));
2834 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2835 return -1;
2838 /* make sure we null terminate the returned strings */
2839 for (i=0; i < info->num; i++) {
2840 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2843 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2844 outdata.dptr,
2845 outdata.dsize);
2846 talloc_free(outdata.dptr);
2847 if (*_info == NULL) {
2848 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2849 "talloc_memdup size %u failed\n",
2850 (unsigned int)outdata.dsize));
2851 return -1;
2854 return 0;
2857 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2858 struct timeval timeout, uint32_t destnode,
2859 TALLOC_CTX *mem_ctx,
2860 struct ctdb_control_get_ifaces **_ifaces)
2862 int ret;
2863 TDB_DATA outdata;
2864 int32_t res;
2865 struct ctdb_control_get_ifaces *ifaces;
2866 uint32_t len;
2867 uint32_t i;
2869 ret = ctdb_control(ctdb, destnode, 0,
2870 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2871 mem_ctx, &outdata, &res, &timeout, NULL);
2872 if (ret != 0 || res != 0) {
2873 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2874 "failed ret:%d res:%d\n",
2875 ret, res));
2876 return -1;
2879 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2880 if (len > outdata.dsize) {
2881 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2882 "returned invalid data with size %u > %u\n",
2883 (unsigned int)outdata.dsize,
2884 (unsigned int)len));
2885 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2886 return -1;
2889 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2890 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2892 if (len > outdata.dsize) {
2893 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2894 "returned invalid data with size %u > %u\n",
2895 (unsigned int)outdata.dsize,
2896 (unsigned int)len));
2897 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2898 return -1;
2901 /* make sure we null terminate the returned strings */
2902 for (i=0; i < ifaces->num; i++) {
2903 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2906 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2907 outdata.dptr,
2908 outdata.dsize);
2909 talloc_free(outdata.dptr);
2910 if (*_ifaces == NULL) {
2911 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2912 "talloc_memdup size %u failed\n",
2913 (unsigned int)outdata.dsize));
2914 return -1;
2917 return 0;
2920 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2921 struct timeval timeout, uint32_t destnode,
2922 TALLOC_CTX *mem_ctx,
2923 const struct ctdb_control_iface_info *info)
2925 int ret;
2926 TDB_DATA indata;
2927 int32_t res;
2929 indata.dptr = discard_const_p(uint8_t, info);
2930 indata.dsize = sizeof(*info);
2932 ret = ctdb_control(ctdb, destnode, 0,
2933 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2934 mem_ctx, NULL, &res, &timeout, NULL);
2935 if (ret != 0 || res != 0) {
2936 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2937 "failed ret:%d res:%d\n",
2938 ret, res));
2939 return -1;
2942 return 0;
2946 set/clear the permanent disabled bit on a remote node
2948 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2949 uint32_t set, uint32_t clear)
2951 int ret;
2952 TDB_DATA data;
2953 struct ctdb_node_map *nodemap=NULL;
2954 struct ctdb_node_flag_change c;
2955 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2956 uint32_t recmaster;
2957 uint32_t *nodes;
2960 /* find the recovery master */
2961 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2962 if (ret != 0) {
2963 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2964 talloc_free(tmp_ctx);
2965 return ret;
2969 /* read the node flags from the recmaster */
2970 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2971 if (ret != 0) {
2972 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2973 talloc_free(tmp_ctx);
2974 return -1;
2976 if (destnode >= nodemap->num) {
2977 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2978 talloc_free(tmp_ctx);
2979 return -1;
2982 c.pnn = destnode;
2983 c.old_flags = nodemap->nodes[destnode].flags;
2984 c.new_flags = c.old_flags;
2985 c.new_flags |= set;
2986 c.new_flags &= ~clear;
2988 data.dsize = sizeof(c);
2989 data.dptr = (unsigned char *)&c;
2991 /* send the flags update to all connected nodes */
2992 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2994 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
2995 nodes, 0,
2996 timeout, false, data,
2997 NULL, NULL,
2998 NULL) != 0) {
2999 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
3001 talloc_free(tmp_ctx);
3002 return -1;
3005 talloc_free(tmp_ctx);
3006 return 0;
3011 get all tunables
3013 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
3014 struct timeval timeout,
3015 uint32_t destnode,
3016 struct ctdb_tunable *tunables)
3018 TDB_DATA outdata;
3019 int ret;
3020 int32_t res;
3022 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
3023 &outdata, &res, &timeout, NULL);
3024 if (ret != 0 || res != 0) {
3025 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
3026 return -1;
3029 if (outdata.dsize != sizeof(*tunables)) {
3030 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
3031 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
3032 return -1;
3035 *tunables = *(struct ctdb_tunable *)outdata.dptr;
3036 talloc_free(outdata.dptr);
3037 return 0;
3041 add a public address to a node
3043 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
3044 struct timeval timeout,
3045 uint32_t destnode,
3046 struct ctdb_control_ip_iface *pub)
3048 TDB_DATA data;
3049 int32_t res;
3050 int ret;
3052 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3053 data.dptr = (unsigned char *)pub;
3055 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
3056 NULL, &res, &timeout, NULL);
3057 if (ret != 0 || res != 0) {
3058 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
3059 return -1;
3062 return 0;
3066 delete a public address from a node
3068 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
3069 struct timeval timeout,
3070 uint32_t destnode,
3071 struct ctdb_control_ip_iface *pub)
3073 TDB_DATA data;
3074 int32_t res;
3075 int ret;
3077 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3078 data.dptr = (unsigned char *)pub;
3080 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
3081 NULL, &res, &timeout, NULL);
3082 if (ret != 0 || res != 0) {
3083 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
3084 return -1;
3087 return 0;
3091 kill a tcp connection
3093 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
3094 struct timeval timeout,
3095 uint32_t destnode,
3096 struct ctdb_control_killtcp *killtcp)
3098 TDB_DATA data;
3099 int32_t res;
3100 int ret;
3102 data.dsize = sizeof(struct ctdb_control_killtcp);
3103 data.dptr = (unsigned char *)killtcp;
3105 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
3106 NULL, &res, &timeout, NULL);
3107 if (ret != 0 || res != 0) {
3108 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
3109 return -1;
3112 return 0;
3116 send a gratious arp
3118 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
3119 struct timeval timeout,
3120 uint32_t destnode,
3121 ctdb_sock_addr *addr,
3122 const char *ifname)
3124 TDB_DATA data;
3125 int32_t res;
3126 int ret, len;
3127 struct ctdb_control_gratious_arp *gratious_arp;
3128 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
3131 len = strlen(ifname)+1;
3132 gratious_arp = talloc_size(tmp_ctx,
3133 offsetof(struct ctdb_control_gratious_arp, iface) + len);
3134 CTDB_NO_MEMORY(ctdb, gratious_arp);
3136 gratious_arp->addr = *addr;
3137 gratious_arp->len = len;
3138 memcpy(&gratious_arp->iface[0], ifname, len);
3141 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
3142 data.dptr = (unsigned char *)gratious_arp;
3144 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
3145 NULL, &res, &timeout, NULL);
3146 if (ret != 0 || res != 0) {
3147 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
3148 talloc_free(tmp_ctx);
3149 return -1;
3152 talloc_free(tmp_ctx);
3153 return 0;
3157 get a list of all tcp tickles that a node knows about for a particular vnn
3159 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
3160 struct timeval timeout, uint32_t destnode,
3161 TALLOC_CTX *mem_ctx,
3162 ctdb_sock_addr *addr,
3163 struct ctdb_control_tcp_tickle_list **list)
3165 int ret;
3166 TDB_DATA data, outdata;
3167 int32_t status;
3169 data.dptr = (uint8_t*)addr;
3170 data.dsize = sizeof(ctdb_sock_addr);
3172 ret = ctdb_control(ctdb, destnode, 0,
3173 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
3174 mem_ctx, &outdata, &status, NULL, NULL);
3175 if (ret != 0 || status != 0) {
3176 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
3177 return -1;
3180 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
3182 return status;
3186 register a server id
3188 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
3189 struct timeval timeout,
3190 struct ctdb_server_id *id)
3192 TDB_DATA data;
3193 int32_t res;
3194 int ret;
3196 data.dsize = sizeof(struct ctdb_server_id);
3197 data.dptr = (unsigned char *)id;
3199 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3200 CTDB_CONTROL_REGISTER_SERVER_ID,
3201 0, data, NULL,
3202 NULL, &res, &timeout, NULL);
3203 if (ret != 0 || res != 0) {
3204 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
3205 return -1;
3208 return 0;
3212 unregister a server id
3214 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
3215 struct timeval timeout,
3216 struct ctdb_server_id *id)
3218 TDB_DATA data;
3219 int32_t res;
3220 int ret;
3222 data.dsize = sizeof(struct ctdb_server_id);
3223 data.dptr = (unsigned char *)id;
3225 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3226 CTDB_CONTROL_UNREGISTER_SERVER_ID,
3227 0, data, NULL,
3228 NULL, &res, &timeout, NULL);
3229 if (ret != 0 || res != 0) {
3230 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
3231 return -1;
3234 return 0;
3239 check if a server id exists
3241 if a server id does exist, return *status == 1, otherwise *status == 0
3243 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
3244 struct timeval timeout,
3245 uint32_t destnode,
3246 struct ctdb_server_id *id,
3247 uint32_t *status)
3249 TDB_DATA data;
3250 int32_t res;
3251 int ret;
3253 data.dsize = sizeof(struct ctdb_server_id);
3254 data.dptr = (unsigned char *)id;
3256 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
3257 0, data, NULL,
3258 NULL, &res, &timeout, NULL);
3259 if (ret != 0) {
3260 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
3261 return -1;
3264 if (res) {
3265 *status = 1;
3266 } else {
3267 *status = 0;
3270 return 0;
3274 get the list of server ids that are registered on a node
3276 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
3277 TALLOC_CTX *mem_ctx,
3278 struct timeval timeout, uint32_t destnode,
3279 struct ctdb_server_id_list **svid_list)
3281 int ret;
3282 TDB_DATA outdata;
3283 int32_t res;
3285 ret = ctdb_control(ctdb, destnode, 0,
3286 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
3287 mem_ctx, &outdata, &res, &timeout, NULL);
3288 if (ret != 0 || res != 0) {
3289 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
3290 return -1;
3293 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
3295 return 0;
3299 initialise the ctdb daemon for client applications
3301 NOTE: In current code the daemon does not fork. This is for testing purposes only
3302 and to simplify the code.
3304 struct ctdb_context *ctdb_init(struct event_context *ev)
3306 int ret;
3307 struct ctdb_context *ctdb;
3309 ctdb = talloc_zero(ev, struct ctdb_context);
3310 if (ctdb == NULL) {
3311 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
3312 return NULL;
3314 ctdb->ev = ev;
3315 ctdb->idr = idr_init(ctdb);
3316 /* Wrap early to exercise code. */
3317 ctdb->lastid = INT_MAX-200;
3318 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
3320 ret = ctdb_set_socketname(ctdb, CTDB_SOCKET);
3321 if (ret != 0) {
3322 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
3323 talloc_free(ctdb);
3324 return NULL;
3327 ctdb->statistics.statistics_start_time = timeval_current();
3329 return ctdb;
3334 set some ctdb flags
3336 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
3338 ctdb->flags |= flags;
3342 setup the local socket name
3344 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
3346 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
3347 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
3349 return 0;
3352 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
3354 return ctdb->daemon.name;
3358 return the pnn of this node
3360 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
3362 return ctdb->pnn;
3367 get the uptime of a remote node
3369 struct ctdb_client_control_state *
3370 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3372 return ctdb_control_send(ctdb, destnode, 0,
3373 CTDB_CONTROL_UPTIME, 0, tdb_null,
3374 mem_ctx, &timeout, NULL);
3377 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
3379 int ret;
3380 int32_t res;
3381 TDB_DATA outdata;
3383 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3384 if (ret != 0 || res != 0) {
3385 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
3386 return -1;
3389 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
3391 return 0;
3394 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
3396 struct ctdb_client_control_state *state;
3398 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
3399 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
3403 send a control to execute the "recovered" event script on a node
3405 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3407 int ret;
3408 int32_t status;
3410 ret = ctdb_control(ctdb, destnode, 0,
3411 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3412 NULL, NULL, &status, &timeout, NULL);
3413 if (ret != 0 || status != 0) {
3414 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3415 return -1;
3418 return 0;
3422 callback for the async helpers used when sending the same control
3423 to multiple nodes in parallell.
3425 static void async_callback(struct ctdb_client_control_state *state)
3427 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3428 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3429 int ret;
3430 TDB_DATA outdata;
3431 int32_t res = -1;
3432 uint32_t destnode = state->c->hdr.destnode;
3434 outdata.dsize = 0;
3435 outdata.dptr = NULL;
3437 /* one more node has responded with recmode data */
3438 data->count--;
3440 /* if we failed to push the db, then return an error and let
3441 the main loop try again.
3443 if (state->state != CTDB_CONTROL_DONE) {
3444 if ( !data->dont_log_errors) {
3445 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3447 data->fail_count++;
3448 if (state->state == CTDB_CONTROL_TIMEOUT) {
3449 res = -ETIME;
3450 } else {
3451 res = -1;
3453 if (data->fail_callback) {
3454 data->fail_callback(ctdb, destnode, res, outdata,
3455 data->callback_data);
3457 return;
3460 state->async.fn = NULL;
3462 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3463 if ((ret != 0) || (res != 0)) {
3464 if ( !data->dont_log_errors) {
3465 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3467 data->fail_count++;
3468 if (data->fail_callback) {
3469 data->fail_callback(ctdb, destnode, res, outdata,
3470 data->callback_data);
3473 if ((ret == 0) && (data->callback != NULL)) {
3474 data->callback(ctdb, destnode, res, outdata,
3475 data->callback_data);
3480 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3482 /* set up the callback functions */
3483 state->async.fn = async_callback;
3484 state->async.private_data = data;
3486 /* one more control to wait for to complete */
3487 data->count++;
3491 /* wait for up to the maximum number of seconds allowed
3492 or until all nodes we expect a response from has replied
3494 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3496 while (data->count > 0) {
3497 event_loop_once(ctdb->ev);
3499 if (data->fail_count != 0) {
3500 if (!data->dont_log_errors) {
3501 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3502 data->fail_count));
3504 return -1;
3506 return 0;
3511 perform a simple control on the listed nodes
3512 The control cannot return data
3514 int ctdb_client_async_control(struct ctdb_context *ctdb,
3515 enum ctdb_controls opcode,
3516 uint32_t *nodes,
3517 uint64_t srvid,
3518 struct timeval timeout,
3519 bool dont_log_errors,
3520 TDB_DATA data,
3521 client_async_callback client_callback,
3522 client_async_callback fail_callback,
3523 void *callback_data)
3525 struct client_async_data *async_data;
3526 struct ctdb_client_control_state *state;
3527 int j, num_nodes;
3529 async_data = talloc_zero(ctdb, struct client_async_data);
3530 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3531 async_data->dont_log_errors = dont_log_errors;
3532 async_data->callback = client_callback;
3533 async_data->fail_callback = fail_callback;
3534 async_data->callback_data = callback_data;
3535 async_data->opcode = opcode;
3537 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3539 /* loop over all nodes and send an async control to each of them */
3540 for (j=0; j<num_nodes; j++) {
3541 uint32_t pnn = nodes[j];
3543 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3544 0, data, async_data, &timeout, NULL);
3545 if (state == NULL) {
3546 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3547 talloc_free(async_data);
3548 return -1;
3551 ctdb_client_async_add(async_data, state);
3554 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3555 talloc_free(async_data);
3556 return -1;
3559 talloc_free(async_data);
3560 return 0;
3563 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3564 struct ctdb_vnn_map *vnn_map,
3565 TALLOC_CTX *mem_ctx,
3566 bool include_self)
3568 int i, j, num_nodes;
3569 uint32_t *nodes;
3571 for (i=num_nodes=0;i<vnn_map->size;i++) {
3572 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3573 continue;
3575 num_nodes++;
3578 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3579 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3581 for (i=j=0;i<vnn_map->size;i++) {
3582 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3583 continue;
3585 nodes[j++] = vnn_map->map[i];
3588 return nodes;
3591 /* Get list of nodes not including those with flags specified by mask.
3592 * If exclude_pnn is not -1 then exclude that pnn from the list.
3594 uint32_t *list_of_nodes(struct ctdb_context *ctdb,
3595 struct ctdb_node_map *node_map,
3596 TALLOC_CTX *mem_ctx,
3597 uint32_t mask,
3598 int exclude_pnn)
3600 int i, j, num_nodes;
3601 uint32_t *nodes;
3603 for (i=num_nodes=0;i<node_map->num;i++) {
3604 if (node_map->nodes[i].flags & mask) {
3605 continue;
3607 if (node_map->nodes[i].pnn == exclude_pnn) {
3608 continue;
3610 num_nodes++;
3613 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3614 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3616 for (i=j=0;i<node_map->num;i++) {
3617 if (node_map->nodes[i].flags & mask) {
3618 continue;
3620 if (node_map->nodes[i].pnn == exclude_pnn) {
3621 continue;
3623 nodes[j++] = node_map->nodes[i].pnn;
3626 return nodes;
3629 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3630 struct ctdb_node_map *node_map,
3631 TALLOC_CTX *mem_ctx,
3632 bool include_self)
3634 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_INACTIVE,
3635 include_self ? -1 : ctdb->pnn);
3638 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3639 struct ctdb_node_map *node_map,
3640 TALLOC_CTX *mem_ctx,
3641 bool include_self)
3643 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_DISCONNECTED,
3644 include_self ? -1 : ctdb->pnn);
3648 this is used to test if a pnn lock exists and if it exists will return
3649 the number of connections that pnn has reported or -1 if that recovery
3650 daemon is not running.
3653 ctdb_read_pnn_lock(int fd, int32_t pnn)
3655 struct flock lock;
3656 char c;
3658 lock.l_type = F_WRLCK;
3659 lock.l_whence = SEEK_SET;
3660 lock.l_start = pnn;
3661 lock.l_len = 1;
3662 lock.l_pid = 0;
3664 if (fcntl(fd, F_GETLK, &lock) != 0) {
3665 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3666 return -1;
3669 if (lock.l_type == F_UNLCK) {
3670 return -1;
3673 if (pread(fd, &c, 1, pnn) == -1) {
3674 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3675 return -1;
3678 return c;
3682 get capabilities of a remote node
3684 struct ctdb_client_control_state *
3685 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3687 return ctdb_control_send(ctdb, destnode, 0,
3688 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3689 mem_ctx, &timeout, NULL);
3692 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3694 int ret;
3695 int32_t res;
3696 TDB_DATA outdata;
3698 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3699 if ( (ret != 0) || (res != 0) ) {
3700 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3701 return -1;
3704 if (capabilities) {
3705 *capabilities = *((uint32_t *)outdata.dptr);
3708 return 0;
3711 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3713 struct ctdb_client_control_state *state;
3714 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3715 int ret;
3717 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3718 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3719 talloc_free(tmp_ctx);
3720 return ret;
3723 static void get_capabilities_callback(struct ctdb_context *ctdb,
3724 uint32_t node_pnn, int32_t res,
3725 TDB_DATA outdata, void *callback_data)
3727 struct ctdb_node_capabilities *caps =
3728 talloc_get_type(callback_data,
3729 struct ctdb_node_capabilities);
3731 if ( (outdata.dsize != sizeof(uint32_t)) || (outdata.dptr == NULL) ) {
3732 DEBUG(DEBUG_ERR, (__location__ " Invalid length/pointer for getcap callback : %u %p\n", (unsigned)outdata.dsize, outdata.dptr));
3733 return;
3736 if (node_pnn >= talloc_array_length(caps)) {
3737 DEBUG(DEBUG_ERR,
3738 (__location__ " unexpected PNN %u\n", node_pnn));
3739 return;
3742 caps[node_pnn].retrieved = true;
3743 caps[node_pnn].capabilities = *((uint32_t *)outdata.dptr);
3746 struct ctdb_node_capabilities *
3747 ctdb_get_capabilities(struct ctdb_context *ctdb,
3748 TALLOC_CTX *mem_ctx,
3749 struct timeval timeout,
3750 struct ctdb_node_map *nodemap)
3752 uint32_t *nodes;
3753 uint32_t i, res;
3754 struct ctdb_node_capabilities *ret;
3756 nodes = list_of_connected_nodes(ctdb, nodemap, mem_ctx, true);
3758 ret = talloc_array(mem_ctx, struct ctdb_node_capabilities,
3759 nodemap->num);
3760 CTDB_NO_MEMORY_NULL(ctdb, ret);
3761 /* Prepopulate the expected PNNs */
3762 for (i = 0; i < talloc_array_length(ret); i++) {
3763 ret[i].retrieved = false;
3766 res = ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_CAPABILITIES,
3767 nodes, 0, timeout,
3768 false, tdb_null,
3769 get_capabilities_callback, NULL,
3770 ret);
3771 if (res != 0) {
3772 DEBUG(DEBUG_ERR,
3773 (__location__ " Failed to read node capabilities.\n"));
3774 TALLOC_FREE(ret);
3777 return ret;
3780 uint32_t *
3781 ctdb_get_node_capabilities(struct ctdb_node_capabilities *caps,
3782 uint32_t pnn)
3784 if (pnn < talloc_array_length(caps) && caps[pnn].retrieved) {
3785 return &caps[pnn].capabilities;
3788 return NULL;
3791 bool ctdb_node_has_capabilities(struct ctdb_node_capabilities *caps,
3792 uint32_t pnn,
3793 uint32_t capabilities_required)
3795 uint32_t *capp = ctdb_get_node_capabilities(caps, pnn);
3796 return (capp != NULL) &&
3797 ((*capp & capabilities_required) == capabilities_required);
3801 struct server_id {
3802 uint64_t pid;
3803 uint32_t task_id;
3804 uint32_t vnn;
3805 uint64_t unique_id;
3808 static struct server_id server_id_fetch(struct ctdb_context *ctdb, uint32_t reqid)
3810 struct server_id id;
3812 id.pid = getpid();
3813 id.task_id = reqid;
3814 id.vnn = ctdb_get_pnn(ctdb);
3815 id.unique_id = id.vnn;
3816 id.unique_id = (id.unique_id << 32) | reqid;
3818 return id;
3821 /* This is basically a copy from Samba's server_id.*. However, a
3822 * dependency chain stops us from using Samba's version, so use a
3823 * renamed copy until a better solution is found. */
3824 static bool ctdb_server_id_equal(struct server_id *id1, struct server_id *id2)
3826 if (id1->pid != id2->pid) {
3827 return false;
3830 if (id1->task_id != id2->task_id) {
3831 return false;
3834 if (id1->vnn != id2->vnn) {
3835 return false;
3838 if (id1->unique_id != id2->unique_id) {
3839 return false;
3842 return true;
3845 static bool server_id_exists(struct ctdb_context *ctdb, struct server_id *id)
3847 struct ctdb_server_id sid;
3848 int ret;
3849 uint32_t result = 0;
3851 sid.type = SERVER_TYPE_SAMBA;
3852 sid.pnn = id->vnn;
3853 sid.server_id = id->pid;
3855 ret = ctdb_ctrl_check_server_id(ctdb, timeval_current_ofs(3,0),
3856 id->vnn, &sid, &result);
3857 if (ret != 0) {
3858 /* If control times out, assume server_id exists. */
3859 return true;
3862 if (result) {
3863 return true;
3866 return false;
3870 enum g_lock_type {
3871 G_LOCK_READ = 0,
3872 G_LOCK_WRITE = 1,
3875 struct g_lock_rec {
3876 enum g_lock_type type;
3877 struct server_id id;
3880 struct g_lock_recs {
3881 unsigned int num;
3882 struct g_lock_rec *lock;
3885 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
3886 struct g_lock_recs **locks)
3888 struct g_lock_recs *recs;
3890 recs = talloc_zero(mem_ctx, struct g_lock_recs);
3891 if (recs == NULL) {
3892 return false;
3895 if (data.dsize == 0) {
3896 goto done;
3899 if (data.dsize % sizeof(struct g_lock_rec) != 0) {
3900 DEBUG(DEBUG_ERR, (__location__ "invalid data size %lu in g_lock record\n",
3901 (unsigned long)data.dsize));
3902 talloc_free(recs);
3903 return false;
3906 recs->num = data.dsize / sizeof(struct g_lock_rec);
3907 recs->lock = talloc_memdup(mem_ctx, data.dptr, data.dsize);
3908 if (recs->lock == NULL) {
3909 talloc_free(recs);
3910 return false;
3913 done:
3914 if (locks != NULL) {
3915 *locks = recs;
3918 return true;
3922 static bool g_lock_lock(TALLOC_CTX *mem_ctx,
3923 struct ctdb_db_context *ctdb_db,
3924 const char *keyname, uint32_t reqid)
3926 TDB_DATA key, data;
3927 struct ctdb_record_handle *h;
3928 struct g_lock_recs *locks;
3929 struct server_id id;
3930 struct timeval t_start;
3931 int i;
3933 key.dptr = (uint8_t *)discard_const(keyname);
3934 key.dsize = strlen(keyname) + 1;
3936 t_start = timeval_current();
3938 again:
3939 /* Keep trying for an hour. */
3940 if (timeval_elapsed(&t_start) > 3600) {
3941 return false;
3944 h = ctdb_fetch_lock(ctdb_db, mem_ctx, key, &data);
3945 if (h == NULL) {
3946 return false;
3949 if (!g_lock_parse(h, data, &locks)) {
3950 DEBUG(DEBUG_ERR, ("g_lock: error parsing locks\n"));
3951 talloc_free(data.dptr);
3952 talloc_free(h);
3953 return false;
3956 talloc_free(data.dptr);
3958 id = server_id_fetch(ctdb_db->ctdb, reqid);
3960 i = 0;
3961 while (i < locks->num) {
3962 if (ctdb_server_id_equal(&locks->lock[i].id, &id)) {
3963 /* Internal error */
3964 talloc_free(h);
3965 return false;
3968 if (!server_id_exists(ctdb_db->ctdb, &locks->lock[i].id)) {
3969 if (i < locks->num-1) {
3970 locks->lock[i] = locks->lock[locks->num-1];
3972 locks->num--;
3973 continue;
3976 /* This entry is locked. */
3977 DEBUG(DEBUG_INFO, ("g_lock: lock already granted for "
3978 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
3979 (unsigned long long)id.pid,
3980 id.task_id, id.vnn,
3981 (unsigned long long)id.unique_id));
3982 talloc_free(h);
3983 goto again;
3986 locks->lock = talloc_realloc(locks, locks->lock, struct g_lock_rec,
3987 locks->num+1);
3988 if (locks->lock == NULL) {
3989 talloc_free(h);
3990 return false;
3993 locks->lock[locks->num].type = G_LOCK_WRITE;
3994 locks->lock[locks->num].id = id;
3995 locks->num++;
3997 data.dptr = (uint8_t *)locks->lock;
3998 data.dsize = locks->num * sizeof(struct g_lock_rec);
4000 if (ctdb_record_store(h, data) != 0) {
4001 DEBUG(DEBUG_ERR, ("g_lock: failed to write transaction lock for "
4002 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
4003 (unsigned long long)id.pid,
4004 id.task_id, id.vnn,
4005 (unsigned long long)id.unique_id));
4006 talloc_free(h);
4007 return false;
4010 DEBUG(DEBUG_INFO, ("g_lock: lock granted for "
4011 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
4012 (unsigned long long)id.pid,
4013 id.task_id, id.vnn,
4014 (unsigned long long)id.unique_id));
4016 talloc_free(h);
4017 return true;
4020 static bool g_lock_unlock(TALLOC_CTX *mem_ctx,
4021 struct ctdb_db_context *ctdb_db,
4022 const char *keyname, uint32_t reqid)
4024 TDB_DATA key, data;
4025 struct ctdb_record_handle *h;
4026 struct g_lock_recs *locks;
4027 struct server_id id;
4028 int i;
4029 bool found = false;
4031 key.dptr = (uint8_t *)discard_const(keyname);
4032 key.dsize = strlen(keyname) + 1;
4033 h = ctdb_fetch_lock(ctdb_db, mem_ctx, key, &data);
4034 if (h == NULL) {
4035 return false;
4038 if (!g_lock_parse(h, data, &locks)) {
4039 DEBUG(DEBUG_ERR, ("g_lock: error parsing locks\n"));
4040 talloc_free(data.dptr);
4041 talloc_free(h);
4042 return false;
4045 talloc_free(data.dptr);
4047 id = server_id_fetch(ctdb_db->ctdb, reqid);
4049 for (i=0; i<locks->num; i++) {
4050 if (ctdb_server_id_equal(&locks->lock[i].id, &id)) {
4051 if (i < locks->num-1) {
4052 locks->lock[i] = locks->lock[locks->num-1];
4054 locks->num--;
4055 found = true;
4056 break;
4060 if (!found) {
4061 DEBUG(DEBUG_ERR, ("g_lock: lock not found\n"));
4062 talloc_free(h);
4063 return false;
4066 data.dptr = (uint8_t *)locks->lock;
4067 data.dsize = locks->num * sizeof(struct g_lock_rec);
4069 if (ctdb_record_store(h, data) != 0) {
4070 talloc_free(h);
4071 return false;
4074 talloc_free(h);
4075 return true;
4079 struct ctdb_transaction_handle {
4080 struct ctdb_db_context *ctdb_db;
4081 struct ctdb_db_context *g_lock_db;
4082 char *lock_name;
4083 uint32_t reqid;
4085 * we store reads and writes done under a transaction:
4086 * - one list stores both reads and writes (m_all)
4087 * - the other just writes (m_write)
4089 struct ctdb_marshall_buffer *m_all;
4090 struct ctdb_marshall_buffer *m_write;
4093 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
4095 g_lock_unlock(h, h->g_lock_db, h->lock_name, h->reqid);
4096 ctdb_reqid_remove(h->ctdb_db->ctdb, h->reqid);
4097 return 0;
4102 * start a transaction on a database
4104 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
4105 TALLOC_CTX *mem_ctx)
4107 struct ctdb_transaction_handle *h;
4108 struct ctdb_server_id id;
4110 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
4111 if (h == NULL) {
4112 DEBUG(DEBUG_ERR, (__location__ " memory allocation error\n"));
4113 return NULL;
4116 h->ctdb_db = ctdb_db;
4117 h->lock_name = talloc_asprintf(h, "transaction_db_0x%08x",
4118 (unsigned int)ctdb_db->db_id);
4119 if (h->lock_name == NULL) {
4120 DEBUG(DEBUG_ERR, (__location__ " talloc asprintf failed\n"));
4121 talloc_free(h);
4122 return NULL;
4125 h->g_lock_db = ctdb_attach(h->ctdb_db->ctdb, timeval_current_ofs(3,0),
4126 "g_lock.tdb", false, 0);
4127 if (!h->g_lock_db) {
4128 DEBUG(DEBUG_ERR, (__location__ " unable to attach to g_lock.tdb\n"));
4129 talloc_free(h);
4130 return NULL;
4133 id.type = SERVER_TYPE_SAMBA;
4134 id.pnn = ctdb_get_pnn(ctdb_db->ctdb);
4135 id.server_id = getpid();
4137 if (ctdb_ctrl_register_server_id(ctdb_db->ctdb, timeval_current_ofs(3,0),
4138 &id) != 0) {
4139 DEBUG(DEBUG_ERR, (__location__ " unable to register server id\n"));
4140 talloc_free(h);
4141 return NULL;
4144 h->reqid = ctdb_reqid_new(h->ctdb_db->ctdb, h);
4146 if (!g_lock_lock(h, h->g_lock_db, h->lock_name, h->reqid)) {
4147 DEBUG(DEBUG_ERR, (__location__ " Error locking g_lock.tdb\n"));
4148 talloc_free(h);
4149 return NULL;
4152 talloc_set_destructor(h, ctdb_transaction_destructor);
4153 return h;
4157 * fetch a record inside a transaction
4159 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
4160 TALLOC_CTX *mem_ctx,
4161 TDB_DATA key, TDB_DATA *data)
4163 struct ctdb_ltdb_header header;
4164 int ret;
4166 ZERO_STRUCT(header);
4168 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
4169 if (ret == -1 && header.dmaster == (uint32_t)-1) {
4170 /* record doesn't exist yet */
4171 *data = tdb_null;
4172 ret = 0;
4175 if (ret != 0) {
4176 return ret;
4179 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
4180 if (h->m_all == NULL) {
4181 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4182 return -1;
4185 return 0;
4189 * stores a record inside a transaction
4191 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
4192 TDB_DATA key, TDB_DATA data)
4194 TALLOC_CTX *tmp_ctx = talloc_new(h);
4195 struct ctdb_ltdb_header header;
4196 TDB_DATA olddata;
4197 int ret;
4199 /* we need the header so we can update the RSN */
4200 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
4201 if (ret == -1 && header.dmaster == (uint32_t)-1) {
4202 /* the record doesn't exist - create one with us as dmaster.
4203 This is only safe because we are in a transaction and this
4204 is a persistent database */
4205 ZERO_STRUCT(header);
4206 } else if (ret != 0) {
4207 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
4208 talloc_free(tmp_ctx);
4209 return ret;
4212 if (data.dsize == olddata.dsize &&
4213 memcmp(data.dptr, olddata.dptr, data.dsize) == 0 &&
4214 header.rsn != 0) {
4215 /* save writing the same data */
4216 talloc_free(tmp_ctx);
4217 return 0;
4220 header.dmaster = h->ctdb_db->ctdb->pnn;
4221 header.rsn++;
4223 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
4224 if (h->m_all == NULL) {
4225 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4226 talloc_free(tmp_ctx);
4227 return -1;
4230 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
4231 if (h->m_write == NULL) {
4232 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4233 talloc_free(tmp_ctx);
4234 return -1;
4237 talloc_free(tmp_ctx);
4238 return 0;
4241 static int ctdb_fetch_db_seqnum(struct ctdb_db_context *ctdb_db, uint64_t *seqnum)
4243 const char *keyname = CTDB_DB_SEQNUM_KEY;
4244 TDB_DATA key, data;
4245 struct ctdb_ltdb_header header;
4246 int ret;
4248 key.dptr = (uint8_t *)discard_const(keyname);
4249 key.dsize = strlen(keyname) + 1;
4251 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, ctdb_db, &data);
4252 if (ret != 0) {
4253 *seqnum = 0;
4254 return 0;
4257 if (data.dsize == 0) {
4258 *seqnum = 0;
4259 return 0;
4262 if (data.dsize != sizeof(*seqnum)) {
4263 DEBUG(DEBUG_ERR, (__location__ " Invalid data recived len=%zi\n",
4264 data.dsize));
4265 talloc_free(data.dptr);
4266 return -1;
4269 *seqnum = *(uint64_t *)data.dptr;
4270 talloc_free(data.dptr);
4272 return 0;
4276 static int ctdb_store_db_seqnum(struct ctdb_transaction_handle *h,
4277 uint64_t seqnum)
4279 const char *keyname = CTDB_DB_SEQNUM_KEY;
4280 TDB_DATA key, data;
4282 key.dptr = (uint8_t *)discard_const(keyname);
4283 key.dsize = strlen(keyname) + 1;
4285 data.dptr = (uint8_t *)&seqnum;
4286 data.dsize = sizeof(seqnum);
4288 return ctdb_transaction_store(h, key, data);
4293 * commit a transaction
4295 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
4297 int ret;
4298 uint64_t old_seqnum, new_seqnum;
4299 int32_t status;
4300 struct timeval timeout;
4302 if (h->m_write == NULL) {
4303 /* no changes were made */
4304 talloc_free(h);
4305 return 0;
4308 ret = ctdb_fetch_db_seqnum(h->ctdb_db, &old_seqnum);
4309 if (ret != 0) {
4310 DEBUG(DEBUG_ERR, (__location__ " failed to fetch db sequence number\n"));
4311 ret = -1;
4312 goto done;
4315 new_seqnum = old_seqnum + 1;
4316 ret = ctdb_store_db_seqnum(h, new_seqnum);
4317 if (ret != 0) {
4318 DEBUG(DEBUG_ERR, (__location__ " failed to store db sequence number\n"));
4319 ret = -1;
4320 goto done;
4323 again:
4324 timeout = timeval_current_ofs(3,0);
4325 ret = ctdb_control(h->ctdb_db->ctdb, CTDB_CURRENT_NODE,
4326 h->ctdb_db->db_id,
4327 CTDB_CONTROL_TRANS3_COMMIT, 0,
4328 ctdb_marshall_finish(h->m_write), NULL, NULL,
4329 &status, &timeout, NULL);
4330 if (ret != 0 || status != 0) {
4332 * TRANS3_COMMIT control will only fail if recovery has been
4333 * triggered. Check if the database has been updated or not.
4335 ret = ctdb_fetch_db_seqnum(h->ctdb_db, &new_seqnum);
4336 if (ret != 0) {
4337 DEBUG(DEBUG_ERR, (__location__ " failed to fetch db sequence number\n"));
4338 goto done;
4341 if (new_seqnum == old_seqnum) {
4342 /* Database not yet updated, try again */
4343 goto again;
4346 if (new_seqnum != (old_seqnum + 1)) {
4347 DEBUG(DEBUG_ERR, (__location__ " new seqnum [%llu] != old seqnum [%llu] + 1\n",
4348 (long long unsigned)new_seqnum,
4349 (long long unsigned)old_seqnum));
4350 ret = -1;
4351 goto done;
4355 ret = 0;
4357 done:
4358 talloc_free(h);
4359 return ret;
4363 * cancel a transaction
4365 int ctdb_transaction_cancel(struct ctdb_transaction_handle *h)
4367 talloc_free(h);
4368 return 0;
4373 recovery daemon ping to main daemon
4375 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
4377 int ret;
4378 int32_t res;
4380 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
4381 ctdb, NULL, &res, NULL, NULL);
4382 if (ret != 0 || res != 0) {
4383 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
4384 return -1;
4387 return 0;
4390 /* When forking the main daemon and the child process needs to connect
4391 * back to the daemon as a client process, this function can be used
4392 * to change the ctdb context from daemon into client mode. The child
4393 * process must be created using ctdb_fork() and not fork() -
4394 * ctdb_fork() does some necessary housekeeping.
4396 int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
4398 int ret;
4399 va_list ap;
4401 /* Add extra information so we can identify this in the logs */
4402 va_start(ap, fmt);
4403 debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
4404 va_end(ap);
4406 /* get a new event context */
4407 ctdb->ev = event_context_init(ctdb);
4408 tevent_loop_allow_nesting(ctdb->ev);
4410 /* Connect to main CTDB daemon */
4411 ret = ctdb_socket_connect(ctdb);
4412 if (ret != 0) {
4413 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
4414 return -1;
4417 ctdb->can_send_controls = true;
4419 return 0;
4423 get the status of running the monitor eventscripts: NULL means never run.
4425 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
4426 struct timeval timeout, uint32_t destnode,
4427 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
4428 struct ctdb_scripts_wire **scripts)
4430 int ret;
4431 TDB_DATA outdata, indata;
4432 int32_t res;
4433 uint32_t uinttype = type;
4435 indata.dptr = (uint8_t *)&uinttype;
4436 indata.dsize = sizeof(uinttype);
4438 ret = ctdb_control(ctdb, destnode, 0,
4439 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
4440 mem_ctx, &outdata, &res, &timeout, NULL);
4441 if (ret != 0 || res != 0) {
4442 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
4443 return -1;
4446 if (outdata.dsize == 0) {
4447 *scripts = NULL;
4448 } else {
4449 *scripts = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4450 talloc_free(outdata.dptr);
4453 return 0;
4457 tell the main daemon how long it took to lock the reclock file
4459 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
4461 int ret;
4462 int32_t res;
4463 TDB_DATA data;
4465 data.dptr = (uint8_t *)&latency;
4466 data.dsize = sizeof(latency);
4468 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
4469 ctdb, NULL, &res, NULL, NULL);
4470 if (ret != 0 || res != 0) {
4471 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
4472 return -1;
4475 return 0;
4479 get the name of the reclock file
4481 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
4482 uint32_t destnode, TALLOC_CTX *mem_ctx,
4483 const char **name)
4485 int ret;
4486 int32_t res;
4487 TDB_DATA data;
4489 ret = ctdb_control(ctdb, destnode, 0,
4490 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
4491 mem_ctx, &data, &res, &timeout, NULL);
4492 if (ret != 0 || res != 0) {
4493 return -1;
4496 if (data.dsize == 0) {
4497 *name = NULL;
4498 } else {
4499 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
4501 talloc_free(data.dptr);
4503 return 0;
4507 set the reclock filename for a node
4509 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
4511 int ret;
4512 TDB_DATA data;
4513 int32_t res;
4515 if (reclock == NULL) {
4516 data.dsize = 0;
4517 data.dptr = NULL;
4518 } else {
4519 data.dsize = strlen(reclock) + 1;
4520 data.dptr = discard_const(reclock);
4523 ret = ctdb_control(ctdb, destnode, 0,
4524 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
4525 NULL, NULL, &res, &timeout, NULL);
4526 if (ret != 0 || res != 0) {
4527 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
4528 return -1;
4531 return 0;
4535 stop a node
4537 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4539 int ret;
4540 int32_t res;
4542 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
4543 ctdb, NULL, &res, &timeout, NULL);
4544 if (ret != 0 || res != 0) {
4545 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
4546 return -1;
4549 return 0;
4553 continue a node
4555 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4557 int ret;
4559 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
4560 ctdb, NULL, NULL, &timeout, NULL);
4561 if (ret != 0) {
4562 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4563 return -1;
4566 return 0;
4570 set the natgw state for a node
4572 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4574 int ret;
4575 TDB_DATA data;
4576 int32_t res;
4578 data.dsize = sizeof(natgwstate);
4579 data.dptr = (uint8_t *)&natgwstate;
4581 ret = ctdb_control(ctdb, destnode, 0,
4582 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4583 NULL, NULL, &res, &timeout, NULL);
4584 if (ret != 0 || res != 0) {
4585 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4586 return -1;
4589 return 0;
4593 set the lmaster role for a node
4595 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4597 int ret;
4598 TDB_DATA data;
4599 int32_t res;
4601 data.dsize = sizeof(lmasterrole);
4602 data.dptr = (uint8_t *)&lmasterrole;
4604 ret = ctdb_control(ctdb, destnode, 0,
4605 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4606 NULL, NULL, &res, &timeout, NULL);
4607 if (ret != 0 || res != 0) {
4608 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4609 return -1;
4612 return 0;
4616 set the recmaster role for a node
4618 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4620 int ret;
4621 TDB_DATA data;
4622 int32_t res;
4624 data.dsize = sizeof(recmasterrole);
4625 data.dptr = (uint8_t *)&recmasterrole;
4627 ret = ctdb_control(ctdb, destnode, 0,
4628 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4629 NULL, NULL, &res, &timeout, NULL);
4630 if (ret != 0 || res != 0) {
4631 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4632 return -1;
4635 return 0;
4638 /* enable an eventscript
4640 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4642 int ret;
4643 TDB_DATA data;
4644 int32_t res;
4646 data.dsize = strlen(script) + 1;
4647 data.dptr = discard_const(script);
4649 ret = ctdb_control(ctdb, destnode, 0,
4650 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4651 NULL, NULL, &res, &timeout, NULL);
4652 if (ret != 0 || res != 0) {
4653 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4654 return -1;
4657 return 0;
4660 /* disable an eventscript
4662 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4664 int ret;
4665 TDB_DATA data;
4666 int32_t res;
4668 data.dsize = strlen(script) + 1;
4669 data.dptr = discard_const(script);
4671 ret = ctdb_control(ctdb, destnode, 0,
4672 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4673 NULL, NULL, &res, &timeout, NULL);
4674 if (ret != 0 || res != 0) {
4675 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4676 return -1;
4679 return 0;
4683 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4685 int ret;
4686 TDB_DATA data;
4687 int32_t res;
4689 data.dsize = sizeof(*bantime);
4690 data.dptr = (uint8_t *)bantime;
4692 ret = ctdb_control(ctdb, destnode, 0,
4693 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4694 NULL, NULL, &res, &timeout, NULL);
4695 if (ret != 0 || res != 0) {
4696 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4697 return -1;
4700 return 0;
4704 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4706 int ret;
4707 TDB_DATA outdata;
4708 int32_t res;
4709 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4711 ret = ctdb_control(ctdb, destnode, 0,
4712 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4713 tmp_ctx, &outdata, &res, &timeout, NULL);
4714 if (ret != 0 || res != 0) {
4715 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4716 talloc_free(tmp_ctx);
4717 return -1;
4720 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4721 talloc_free(tmp_ctx);
4723 return 0;
4727 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4729 int ret;
4730 int32_t res;
4731 TDB_DATA data;
4732 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4734 data.dptr = (uint8_t*)db_prio;
4735 data.dsize = sizeof(*db_prio);
4737 ret = ctdb_control(ctdb, destnode, 0,
4738 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4739 tmp_ctx, NULL, &res, &timeout, NULL);
4740 if (ret != 0 || res != 0) {
4741 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4742 talloc_free(tmp_ctx);
4743 return -1;
4746 talloc_free(tmp_ctx);
4748 return 0;
4751 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4753 int ret;
4754 int32_t res;
4755 TDB_DATA data;
4756 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4758 data.dptr = (uint8_t*)&db_id;
4759 data.dsize = sizeof(db_id);
4761 ret = ctdb_control(ctdb, destnode, 0,
4762 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4763 tmp_ctx, NULL, &res, &timeout, NULL);
4764 if (ret != 0 || res < 0) {
4765 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_db_priority failed\n"));
4766 talloc_free(tmp_ctx);
4767 return -1;
4770 if (priority) {
4771 *priority = res;
4774 talloc_free(tmp_ctx);
4776 return 0;
4779 int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
4781 int ret;
4782 TDB_DATA outdata;
4783 int32_t res;
4785 ret = ctdb_control(ctdb, destnode, 0,
4786 CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
4787 mem_ctx, &outdata, &res, &timeout, NULL);
4788 if (ret != 0 || res != 0 || outdata.dsize == 0) {
4789 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
4790 return -1;
4793 *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4794 talloc_free(outdata.dptr);
4796 return 0;
4799 struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h)
4801 if (h == NULL) {
4802 return NULL;
4805 return &h->header;
4809 struct ctdb_client_control_state *
4810 ctdb_ctrl_updaterecord_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4812 struct ctdb_client_control_state *handle;
4813 struct ctdb_marshall_buffer *m;
4814 struct ctdb_rec_data *rec;
4815 TDB_DATA outdata;
4817 m = talloc_zero(mem_ctx, struct ctdb_marshall_buffer);
4818 if (m == NULL) {
4819 DEBUG(DEBUG_ERR, ("Failed to allocate marshall buffer for update record\n"));
4820 return NULL;
4823 m->db_id = ctdb_db->db_id;
4825 rec = ctdb_marshall_record(m, 0, key, header, data);
4826 if (rec == NULL) {
4827 DEBUG(DEBUG_ERR,("Failed to marshall record for update record\n"));
4828 talloc_free(m);
4829 return NULL;
4831 m = talloc_realloc_size(mem_ctx, m, rec->length + offsetof(struct ctdb_marshall_buffer, data));
4832 if (m == NULL) {
4833 DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata\n"));
4834 talloc_free(m);
4835 return NULL;
4837 m->count++;
4838 memcpy((uint8_t *)m + offsetof(struct ctdb_marshall_buffer, data), rec, rec->length);
4841 outdata.dptr = (uint8_t *)m;
4842 outdata.dsize = talloc_get_size(m);
4844 handle = ctdb_control_send(ctdb, destnode, 0,
4845 CTDB_CONTROL_UPDATE_RECORD, 0, outdata,
4846 mem_ctx, &timeout, NULL);
4847 talloc_free(m);
4848 return handle;
4851 int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4853 int ret;
4854 int32_t res;
4856 ret = ctdb_control_recv(ctdb, state, state, NULL, &res, NULL);
4857 if ( (ret != 0) || (res != 0) ){
4858 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_update_record_recv failed\n"));
4859 return -1;
4862 return 0;
4866 ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4868 struct ctdb_client_control_state *state;
4870 state = ctdb_ctrl_updaterecord_send(ctdb, mem_ctx, timeout, destnode, ctdb_db, key, header, data);
4871 return ctdb_ctrl_updaterecord_recv(ctdb, state);
4880 set a database to be readonly
4882 struct ctdb_client_control_state *
4883 ctdb_ctrl_set_db_readonly_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4885 TDB_DATA data;
4887 data.dptr = (uint8_t *)&dbid;
4888 data.dsize = sizeof(dbid);
4890 return ctdb_control_send(ctdb, destnode, 0,
4891 CTDB_CONTROL_SET_DB_READONLY, 0, data,
4892 ctdb, NULL, NULL);
4895 int ctdb_ctrl_set_db_readonly_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4897 int ret;
4898 int32_t res;
4900 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4901 if (ret != 0 || res != 0) {
4902 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_readonly_recv failed ret:%d res:%d\n", ret, res));
4903 return -1;
4906 return 0;
4909 int ctdb_ctrl_set_db_readonly(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4911 struct ctdb_client_control_state *state;
4913 state = ctdb_ctrl_set_db_readonly_send(ctdb, destnode, dbid);
4914 return ctdb_ctrl_set_db_readonly_recv(ctdb, state);
4918 set a database to be sticky
4920 struct ctdb_client_control_state *
4921 ctdb_ctrl_set_db_sticky_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4923 TDB_DATA data;
4925 data.dptr = (uint8_t *)&dbid;
4926 data.dsize = sizeof(dbid);
4928 return ctdb_control_send(ctdb, destnode, 0,
4929 CTDB_CONTROL_SET_DB_STICKY, 0, data,
4930 ctdb, NULL, NULL);
4933 int ctdb_ctrl_set_db_sticky_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4935 int ret;
4936 int32_t res;
4938 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4939 if (ret != 0 || res != 0) {
4940 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_sticky_recv failed ret:%d res:%d\n", ret, res));
4941 return -1;
4944 return 0;
4947 int ctdb_ctrl_set_db_sticky(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4949 struct ctdb_client_control_state *state;
4951 state = ctdb_ctrl_set_db_sticky_send(ctdb, destnode, dbid);
4952 return ctdb_ctrl_set_db_sticky_recv(ctdb, state);