ctdb-client: Talloc tdb_wrap off ctdb_db_context
[Samba.git] / ctdb / client / ctdb_client.c
blob73c593fee68ab4a04bd3bcfae778d3acaba5bba2
1 /*
2 ctdb daemon code
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "db_wrap.h"
23 #include "tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/locale.h"
28 #include <stdlib.h>
29 #include "../include/ctdb_private.h"
30 #include "lib/util/dlinklist.h"
32 pid_t ctdbd_pid;
35 allocate a packet for use in client<->daemon communication
37 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
38 TALLOC_CTX *mem_ctx,
39 enum ctdb_operation operation,
40 size_t length, size_t slength,
41 const char *type)
43 int size;
44 struct ctdb_req_header *hdr;
46 length = MAX(length, slength);
47 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
49 hdr = (struct ctdb_req_header *)talloc_zero_size(mem_ctx, size);
50 if (hdr == NULL) {
51 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
52 operation, (unsigned)length));
53 return NULL;
55 talloc_set_name_const(hdr, type);
56 hdr->length = length;
57 hdr->operation = operation;
58 hdr->ctdb_magic = CTDB_MAGIC;
59 hdr->ctdb_version = CTDB_VERSION;
60 hdr->srcnode = ctdb->pnn;
61 if (ctdb->vnn_map) {
62 hdr->generation = ctdb->vnn_map->generation;
65 return hdr;
69 local version of ctdb_call
71 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
72 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
73 TDB_DATA *data, bool updatetdb)
75 struct ctdb_call_info *c;
76 struct ctdb_registered_call *fn;
77 struct ctdb_context *ctdb = ctdb_db->ctdb;
79 c = talloc(ctdb, struct ctdb_call_info);
80 CTDB_NO_MEMORY(ctdb, c);
82 c->key = call->key;
83 c->call_data = &call->call_data;
84 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
85 c->record_data.dsize = data->dsize;
86 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
87 c->new_data = NULL;
88 c->reply_data = NULL;
89 c->status = 0;
90 c->header = header;
92 for (fn=ctdb_db->calls;fn;fn=fn->next) {
93 if (fn->id == call->call_id) break;
95 if (fn == NULL) {
96 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
97 talloc_free(c);
98 return -1;
101 if (fn->fn(c) != 0) {
102 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
103 talloc_free(c);
104 return -1;
107 /* we need to force the record to be written out if this was a remote access */
108 if (c->new_data == NULL) {
109 c->new_data = &c->record_data;
112 if (c->new_data && updatetdb) {
113 /* XXX check that we always have the lock here? */
114 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
115 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
116 talloc_free(c);
117 return -1;
121 if (c->reply_data) {
122 call->reply_data = *c->reply_data;
124 talloc_steal(call, call->reply_data.dptr);
125 talloc_set_name_const(call->reply_data.dptr, __location__);
126 } else {
127 call->reply_data.dptr = NULL;
128 call->reply_data.dsize = 0;
130 call->status = c->status;
132 talloc_free(c);
134 return 0;
139 queue a packet for sending from client to daemon
141 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
143 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
148 called when a CTDB_REPLY_CALL packet comes in in the client
150 This packet comes in response to a CTDB_REQ_CALL request packet. It
151 contains any reply data from the call
153 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
155 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
156 struct ctdb_client_call_state *state;
158 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
159 if (state == NULL) {
160 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
161 return;
164 if (hdr->reqid != state->reqid) {
165 /* we found a record but it was the wrong one */
166 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
167 return;
170 state->call->reply_data.dptr = c->data;
171 state->call->reply_data.dsize = c->datalen;
172 state->call->status = c->status;
174 talloc_steal(state, c);
176 state->state = CTDB_CALL_DONE;
178 if (state->async.fn) {
179 state->async.fn(state);
183 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
186 this is called in the client, when data comes in from the daemon
188 void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
190 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
191 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
192 TALLOC_CTX *tmp_ctx;
194 /* place the packet as a child of a tmp_ctx. We then use
195 talloc_free() below to free it. If any of the calls want
196 to keep it, then they will steal it somewhere else, and the
197 talloc_free() will be a no-op */
198 tmp_ctx = talloc_new(ctdb);
199 talloc_steal(tmp_ctx, hdr);
201 if (cnt == 0) {
202 DEBUG(DEBUG_CRIT,("Daemon has exited - shutting down client\n"));
203 exit(1);
206 if (cnt < sizeof(*hdr)) {
207 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
208 goto done;
210 if (cnt != hdr->length) {
211 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
212 (unsigned)hdr->length, (unsigned)cnt);
213 goto done;
216 if (hdr->ctdb_magic != CTDB_MAGIC) {
217 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
218 goto done;
221 if (hdr->ctdb_version != CTDB_VERSION) {
222 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
223 goto done;
226 switch (hdr->operation) {
227 case CTDB_REPLY_CALL:
228 ctdb_client_reply_call(ctdb, hdr);
229 break;
231 case CTDB_REQ_MESSAGE:
232 ctdb_request_message(ctdb, hdr);
233 break;
235 case CTDB_REPLY_CONTROL:
236 ctdb_client_reply_control(ctdb, hdr);
237 break;
239 default:
240 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
243 done:
244 talloc_free(tmp_ctx);
248 connect to a unix domain socket
250 int ctdb_socket_connect(struct ctdb_context *ctdb)
252 struct sockaddr_un addr;
254 memset(&addr, 0, sizeof(addr));
255 addr.sun_family = AF_UNIX;
256 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path)-1);
258 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
259 if (ctdb->daemon.sd == -1) {
260 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
261 return -1;
264 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
265 close(ctdb->daemon.sd);
266 ctdb->daemon.sd = -1;
267 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
268 return -1;
271 set_nonblocking(ctdb->daemon.sd);
272 set_close_on_exec(ctdb->daemon.sd);
274 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
275 CTDB_DS_ALIGNMENT,
276 ctdb_client_read_cb, ctdb, "to-ctdbd");
277 return 0;
281 struct ctdb_record_handle {
282 struct ctdb_db_context *ctdb_db;
283 TDB_DATA key;
284 TDB_DATA *data;
285 struct ctdb_ltdb_header header;
290 make a recv call to the local ctdb daemon - called from client context
292 This is called when the program wants to wait for a ctdb_call to complete and get the
293 results. This call will block unless the call has already completed.
295 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
297 if (state == NULL) {
298 return -1;
301 while (state->state < CTDB_CALL_DONE) {
302 event_loop_once(state->ctdb_db->ctdb->ev);
304 if (state->state != CTDB_CALL_DONE) {
305 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
306 talloc_free(state);
307 return -1;
310 if (state->call->reply_data.dsize) {
311 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
312 state->call->reply_data.dptr,
313 state->call->reply_data.dsize);
314 call->reply_data.dsize = state->call->reply_data.dsize;
315 } else {
316 call->reply_data.dptr = NULL;
317 call->reply_data.dsize = 0;
319 call->status = state->call->status;
320 talloc_free(state);
322 return call->status;
329 destroy a ctdb_call in client
331 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
333 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
334 return 0;
338 construct an event driven local ctdb_call
340 this is used so that locally processed ctdb_call requests are processed
341 in an event driven manner
343 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
344 struct ctdb_call *call,
345 struct ctdb_ltdb_header *header,
346 TDB_DATA *data)
348 struct ctdb_client_call_state *state;
349 struct ctdb_context *ctdb = ctdb_db->ctdb;
350 int ret;
352 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
353 CTDB_NO_MEMORY_NULL(ctdb, state);
354 state->call = talloc_zero(state, struct ctdb_call);
355 CTDB_NO_MEMORY_NULL(ctdb, state->call);
357 talloc_steal(state, data->dptr);
359 state->state = CTDB_CALL_DONE;
360 *(state->call) = *call;
361 state->ctdb_db = ctdb_db;
363 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, true);
364 if (ret != 0) {
365 DEBUG(DEBUG_DEBUG,("ctdb_call_local() failed, ignoring return code %d\n", ret));
368 return state;
372 make a ctdb call to the local daemon - async send. Called from client context.
374 This constructs a ctdb_call request and queues it for processing.
375 This call never blocks.
377 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
378 struct ctdb_call *call)
380 struct ctdb_client_call_state *state;
381 struct ctdb_context *ctdb = ctdb_db->ctdb;
382 struct ctdb_ltdb_header header;
383 TDB_DATA data;
384 int ret;
385 size_t len;
386 struct ctdb_req_call *c;
388 /* if the domain socket is not yet open, open it */
389 if (ctdb->daemon.sd==-1) {
390 ctdb_socket_connect(ctdb);
393 ret = ctdb_ltdb_lock(ctdb_db, call->key);
394 if (ret != 0) {
395 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
396 return NULL;
399 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
401 if ((call->flags & CTDB_IMMEDIATE_MIGRATION) && (header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
402 ret = -1;
405 if (ret == 0 && header.dmaster == ctdb->pnn) {
406 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
407 talloc_free(data.dptr);
408 ctdb_ltdb_unlock(ctdb_db, call->key);
409 return state;
412 ctdb_ltdb_unlock(ctdb_db, call->key);
413 talloc_free(data.dptr);
415 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
416 if (state == NULL) {
417 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
418 return NULL;
420 state->call = talloc_zero(state, struct ctdb_call);
421 if (state->call == NULL) {
422 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
423 return NULL;
426 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
427 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
428 if (c == NULL) {
429 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
430 return NULL;
433 state->reqid = ctdb_reqid_new(ctdb, state);
434 state->ctdb_db = ctdb_db;
435 talloc_set_destructor(state, ctdb_client_call_destructor);
437 c->hdr.reqid = state->reqid;
438 c->flags = call->flags;
439 c->db_id = ctdb_db->db_id;
440 c->callid = call->call_id;
441 c->hopcount = 0;
442 c->keylen = call->key.dsize;
443 c->calldatalen = call->call_data.dsize;
444 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
445 memcpy(&c->data[call->key.dsize],
446 call->call_data.dptr, call->call_data.dsize);
447 *(state->call) = *call;
448 state->call->call_data.dptr = &c->data[call->key.dsize];
449 state->call->key.dptr = &c->data[0];
451 state->state = CTDB_CALL_WAIT;
454 ctdb_client_queue_pkt(ctdb, &c->hdr);
456 return state;
461 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
463 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
465 struct ctdb_client_call_state *state;
467 state = ctdb_call_send(ctdb_db, call);
468 return ctdb_call_recv(state, call);
473 tell the daemon what messaging srvid we will use, and register the message
474 handler function in the client
476 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
477 ctdb_msg_fn_t handler,
478 void *private_data)
480 int res;
481 int32_t status;
483 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
484 tdb_null, NULL, NULL, &status, NULL, NULL);
485 if (res != 0 || status != 0) {
486 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
487 return -1;
490 /* also need to register the handler with our own ctdb structure */
491 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
495 tell the daemon we no longer want a srvid
497 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
499 int res;
500 int32_t status;
502 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
503 tdb_null, NULL, NULL, &status, NULL, NULL);
504 if (res != 0 || status != 0) {
505 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
506 return -1;
509 /* also need to register the handler with our own ctdb structure */
510 ctdb_deregister_message_handler(ctdb, srvid, private_data);
511 return 0;
515 * check server ids
517 int ctdb_client_check_message_handlers(struct ctdb_context *ctdb, uint64_t *ids, uint32_t num,
518 uint8_t *result)
520 TDB_DATA indata, outdata;
521 int res;
522 int32_t status;
523 int i;
525 indata.dptr = (uint8_t *)ids;
526 indata.dsize = num * sizeof(*ids);
528 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_CHECK_SRVIDS, 0,
529 indata, ctdb, &outdata, &status, NULL, NULL);
530 if (res != 0 || status != 0) {
531 DEBUG(DEBUG_ERR, (__location__ " failed to check srvids\n"));
532 return -1;
535 if (outdata.dsize != num*sizeof(uint8_t)) {
536 DEBUG(DEBUG_ERR, (__location__ " expected %lu bytes, received %zi bytes\n",
537 (long unsigned int)num*sizeof(uint8_t),
538 outdata.dsize));
539 talloc_free(outdata.dptr);
540 return -1;
543 for (i=0; i<num; i++) {
544 result[i] = outdata.dptr[i];
547 talloc_free(outdata.dptr);
548 return 0;
552 send a message - from client context
554 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
555 uint64_t srvid, TDB_DATA data)
557 struct ctdb_req_message *r;
558 int len, res;
560 len = offsetof(struct ctdb_req_message, data) + data.dsize;
561 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
562 len, struct ctdb_req_message);
563 CTDB_NO_MEMORY(ctdb, r);
565 r->hdr.destnode = pnn;
566 r->srvid = srvid;
567 r->datalen = data.dsize;
568 memcpy(&r->data[0], data.dptr, data.dsize);
570 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
571 talloc_free(r);
572 return res;
577 cancel a ctdb_fetch_lock operation, releasing the lock
579 static int fetch_lock_destructor(struct ctdb_record_handle *h)
581 ctdb_ltdb_unlock(h->ctdb_db, h->key);
582 return 0;
586 force the migration of a record to this node
588 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
590 struct ctdb_call call;
591 ZERO_STRUCT(call);
592 call.call_id = CTDB_NULL_FUNC;
593 call.key = key;
594 call.flags = CTDB_IMMEDIATE_MIGRATION;
595 return ctdb_call(ctdb_db, &call);
599 try to fetch a readonly copy of a record
601 static int
602 ctdb_client_fetch_readonly(struct ctdb_db_context *ctdb_db, TDB_DATA key, TALLOC_CTX *mem_ctx, struct ctdb_ltdb_header **hdr, TDB_DATA *data)
604 int ret;
606 struct ctdb_call call;
607 ZERO_STRUCT(call);
609 call.call_id = CTDB_FETCH_WITH_HEADER_FUNC;
610 call.call_data.dptr = NULL;
611 call.call_data.dsize = 0;
612 call.key = key;
613 call.flags = CTDB_WANT_READONLY;
614 ret = ctdb_call(ctdb_db, &call);
616 if (ret != 0) {
617 return -1;
619 if (call.reply_data.dsize < sizeof(struct ctdb_ltdb_header)) {
620 return -1;
623 *hdr = talloc_memdup(mem_ctx, &call.reply_data.dptr[0], sizeof(struct ctdb_ltdb_header));
624 if (*hdr == NULL) {
625 talloc_free(call.reply_data.dptr);
626 return -1;
629 data->dsize = call.reply_data.dsize - sizeof(struct ctdb_ltdb_header);
630 data->dptr = talloc_memdup(mem_ctx, &call.reply_data.dptr[sizeof(struct ctdb_ltdb_header)], data->dsize);
631 if (data->dptr == NULL) {
632 talloc_free(call.reply_data.dptr);
633 talloc_free(hdr);
634 return -1;
637 return 0;
641 get a lock on a record, and return the records data. Blocks until it gets the lock
643 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
644 TDB_DATA key, TDB_DATA *data)
646 int ret;
647 struct ctdb_record_handle *h;
650 procedure is as follows:
652 1) get the chain lock.
653 2) check if we are dmaster
654 3) if we are the dmaster then return handle
655 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
656 reply from ctdbd
657 5) when we get the reply, goto (1)
660 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
661 if (h == NULL) {
662 return NULL;
665 h->ctdb_db = ctdb_db;
666 h->key = key;
667 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
668 if (h->key.dptr == NULL) {
669 talloc_free(h);
670 return NULL;
672 h->data = data;
674 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
675 (const char *)key.dptr));
677 again:
678 /* step 1 - get the chain lock */
679 ret = ctdb_ltdb_lock(ctdb_db, key);
680 if (ret != 0) {
681 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
682 talloc_free(h);
683 return NULL;
686 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
688 talloc_set_destructor(h, fetch_lock_destructor);
690 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
692 /* when torturing, ensure we test the remote path */
693 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
694 random() % 5 == 0) {
695 h->header.dmaster = (uint32_t)-1;
699 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
701 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
702 ctdb_ltdb_unlock(ctdb_db, key);
703 ret = ctdb_client_force_migration(ctdb_db, key);
704 if (ret != 0) {
705 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
706 talloc_free(h);
707 return NULL;
709 goto again;
712 /* if this is a request for read/write and we have delegations
713 we have to revoke all delegations first
715 if ((h->header.dmaster == ctdb_db->ctdb->pnn) &&
716 (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
717 ctdb_ltdb_unlock(ctdb_db, key);
718 ret = ctdb_client_force_migration(ctdb_db, key);
719 if (ret != 0) {
720 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
721 talloc_free(h);
722 return NULL;
724 goto again;
727 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
728 return h;
732 get a readonly lock on a record, and return the records data. Blocks until it gets the lock
734 struct ctdb_record_handle *
735 ctdb_fetch_readonly_lock(
736 struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
737 TDB_DATA key, TDB_DATA *data,
738 int read_only)
740 int ret;
741 struct ctdb_record_handle *h;
742 struct ctdb_ltdb_header *roheader = NULL;
744 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
745 if (h == NULL) {
746 return NULL;
749 h->ctdb_db = ctdb_db;
750 h->key = key;
751 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
752 if (h->key.dptr == NULL) {
753 talloc_free(h);
754 return NULL;
756 h->data = data;
758 data->dptr = NULL;
759 data->dsize = 0;
762 again:
763 talloc_free(roheader);
764 roheader = NULL;
766 talloc_free(data->dptr);
767 data->dptr = NULL;
768 data->dsize = 0;
770 /* Lock the record/chain */
771 ret = ctdb_ltdb_lock(ctdb_db, key);
772 if (ret != 0) {
773 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
774 talloc_free(h);
775 return NULL;
778 talloc_set_destructor(h, fetch_lock_destructor);
780 /* Check if record exists yet in the TDB */
781 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
782 if (ret != 0) {
783 ctdb_ltdb_unlock(ctdb_db, key);
784 ret = ctdb_client_force_migration(ctdb_db, key);
785 if (ret != 0) {
786 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
787 talloc_free(h);
788 return NULL;
790 goto again;
793 /* if this is a request for read/write and we have delegations
794 we have to revoke all delegations first
796 if ((read_only == 0)
797 && (h->header.dmaster == ctdb_db->ctdb->pnn)
798 && (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
799 ctdb_ltdb_unlock(ctdb_db, key);
800 ret = ctdb_client_force_migration(ctdb_db, key);
801 if (ret != 0) {
802 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
803 talloc_free(h);
804 return NULL;
806 goto again;
809 /* if we are dmaster, just return the handle */
810 if (h->header.dmaster == ctdb_db->ctdb->pnn) {
811 return h;
814 if (read_only != 0) {
815 TDB_DATA rodata = {NULL, 0};
817 if ((h->header.flags & CTDB_REC_RO_HAVE_READONLY)
818 || (h->header.flags & CTDB_REC_RO_HAVE_DELEGATIONS)) {
819 return h;
822 ctdb_ltdb_unlock(ctdb_db, key);
823 ret = ctdb_client_fetch_readonly(ctdb_db, key, h, &roheader, &rodata);
824 if (ret != 0) {
825 DEBUG(DEBUG_ERR,("ctdb_fetch_readonly_lock: failed. force migration and try again\n"));
826 ret = ctdb_client_force_migration(ctdb_db, key);
827 if (ret != 0) {
828 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
829 talloc_free(h);
830 return NULL;
833 goto again;
836 if (!(roheader->flags&CTDB_REC_RO_HAVE_READONLY)) {
837 ret = ctdb_client_force_migration(ctdb_db, key);
838 if (ret != 0) {
839 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
840 talloc_free(h);
841 return NULL;
844 goto again;
847 ret = ctdb_ltdb_lock(ctdb_db, key);
848 if (ret != 0) {
849 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
850 talloc_free(h);
851 return NULL;
854 ret = ctdb_ltdb_fetch_with_header(ctdb_db, key, &h->header, h, data);
855 if (ret != 0) {
856 ctdb_ltdb_unlock(ctdb_db, key);
858 ret = ctdb_client_force_migration(ctdb_db, key);
859 if (ret != 0) {
860 DEBUG(DEBUG_DEBUG,("ctdb_fetch_readonly_lock: force_migration failed\n"));
861 talloc_free(h);
862 return NULL;
865 goto again;
868 return h;
871 /* we are not dmaster and this was not a request for a readonly lock
872 * so unlock the record, migrate it and try again
874 ctdb_ltdb_unlock(ctdb_db, key);
875 ret = ctdb_client_force_migration(ctdb_db, key);
876 if (ret != 0) {
877 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
878 talloc_free(h);
879 return NULL;
881 goto again;
885 store some data to the record that was locked with ctdb_fetch_lock()
887 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
889 if (h->ctdb_db->persistent) {
890 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
891 return -1;
894 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
898 non-locking fetch of a record
900 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
901 TDB_DATA key, TDB_DATA *data)
903 struct ctdb_call call;
904 int ret;
906 call.call_id = CTDB_FETCH_FUNC;
907 call.call_data.dptr = NULL;
908 call.call_data.dsize = 0;
909 call.key = key;
911 ret = ctdb_call(ctdb_db, &call);
913 if (ret == 0) {
914 *data = call.reply_data;
915 talloc_steal(mem_ctx, data->dptr);
918 return ret;
924 called when a control completes or timesout to invoke the callback
925 function the user provided
927 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
928 struct timeval t, void *private_data)
930 struct ctdb_client_control_state *state;
931 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
932 int ret;
934 state = talloc_get_type(private_data, struct ctdb_client_control_state);
935 talloc_steal(tmp_ctx, state);
937 ret = ctdb_control_recv(state->ctdb, state, state,
938 NULL,
939 NULL,
940 NULL);
941 if (ret != 0) {
942 DEBUG(DEBUG_DEBUG,("ctdb_control_recv() failed, ignoring return code %d\n", ret));
945 talloc_free(tmp_ctx);
949 called when a CTDB_REPLY_CONTROL packet comes in in the client
951 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
952 contains any reply data from the control
954 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
955 struct ctdb_req_header *hdr)
957 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
958 struct ctdb_client_control_state *state;
960 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
961 if (state == NULL) {
962 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
963 return;
966 if (hdr->reqid != state->reqid) {
967 /* we found a record but it was the wrong one */
968 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
969 return;
972 state->outdata.dptr = c->data;
973 state->outdata.dsize = c->datalen;
974 state->status = c->status;
975 if (c->errorlen) {
976 state->errormsg = talloc_strndup(state,
977 (char *)&c->data[c->datalen],
978 c->errorlen);
981 /* state->outdata now uses resources from c so we dont want c
982 to just dissappear from under us while state is still alive
984 talloc_steal(state, c);
986 state->state = CTDB_CONTROL_DONE;
988 /* if we had a callback registered for this control, pull the response
989 and call the callback.
991 if (state->async.fn) {
992 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
998 destroy a ctdb_control in client
1000 static int ctdb_client_control_destructor(struct ctdb_client_control_state *state)
1002 ctdb_reqid_remove(state->ctdb, state->reqid);
1003 return 0;
1007 /* time out handler for ctdb_control */
1008 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
1009 struct timeval t, void *private_data)
1011 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
1013 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
1014 "dstnode:%u\n", state->reqid, state->c->opcode,
1015 state->c->hdr.destnode));
1017 state->state = CTDB_CONTROL_TIMEOUT;
1019 /* if we had a callback registered for this control, pull the response
1020 and call the callback.
1022 if (state->async.fn) {
1023 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
1027 /* async version of send control request */
1028 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
1029 uint32_t destnode, uint64_t srvid,
1030 uint32_t opcode, uint32_t flags, TDB_DATA data,
1031 TALLOC_CTX *mem_ctx,
1032 struct timeval *timeout,
1033 char **errormsg)
1035 struct ctdb_client_control_state *state;
1036 size_t len;
1037 struct ctdb_req_control *c;
1038 int ret;
1040 if (errormsg) {
1041 *errormsg = NULL;
1044 /* if the domain socket is not yet open, open it */
1045 if (ctdb->daemon.sd==-1) {
1046 ctdb_socket_connect(ctdb);
1049 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
1050 CTDB_NO_MEMORY_NULL(ctdb, state);
1052 state->ctdb = ctdb;
1053 state->reqid = ctdb_reqid_new(ctdb, state);
1054 state->state = CTDB_CONTROL_WAIT;
1055 state->errormsg = NULL;
1057 talloc_set_destructor(state, ctdb_client_control_destructor);
1059 len = offsetof(struct ctdb_req_control, data) + data.dsize;
1060 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
1061 len, struct ctdb_req_control);
1062 state->c = c;
1063 CTDB_NO_MEMORY_NULL(ctdb, c);
1064 c->hdr.reqid = state->reqid;
1065 c->hdr.destnode = destnode;
1066 c->opcode = opcode;
1067 c->client_id = 0;
1068 c->flags = flags;
1069 c->srvid = srvid;
1070 c->datalen = data.dsize;
1071 if (data.dsize) {
1072 memcpy(&c->data[0], data.dptr, data.dsize);
1075 /* timeout */
1076 if (timeout && !timeval_is_zero(timeout)) {
1077 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
1080 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
1081 if (ret != 0) {
1082 talloc_free(state);
1083 return NULL;
1086 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1087 talloc_free(state);
1088 return NULL;
1091 return state;
1095 /* async version of receive control reply */
1096 int ctdb_control_recv(struct ctdb_context *ctdb,
1097 struct ctdb_client_control_state *state,
1098 TALLOC_CTX *mem_ctx,
1099 TDB_DATA *outdata, int32_t *status, char **errormsg)
1101 TALLOC_CTX *tmp_ctx;
1103 if (status != NULL) {
1104 *status = -1;
1106 if (errormsg != NULL) {
1107 *errormsg = NULL;
1110 if (state == NULL) {
1111 return -1;
1114 /* prevent double free of state */
1115 tmp_ctx = talloc_new(ctdb);
1116 talloc_steal(tmp_ctx, state);
1118 /* loop one event at a time until we either timeout or the control
1119 completes.
1121 while (state->state == CTDB_CONTROL_WAIT) {
1122 event_loop_once(ctdb->ev);
1125 if (state->state != CTDB_CONTROL_DONE) {
1126 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
1127 if (state->async.fn) {
1128 state->async.fn(state);
1130 talloc_free(tmp_ctx);
1131 return -1;
1134 if (state->errormsg) {
1135 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
1136 if (errormsg) {
1137 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
1139 if (state->async.fn) {
1140 state->async.fn(state);
1142 talloc_free(tmp_ctx);
1143 return -1;
1146 if (outdata) {
1147 *outdata = state->outdata;
1148 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
1151 if (status) {
1152 *status = state->status;
1155 if (state->async.fn) {
1156 state->async.fn(state);
1159 talloc_free(tmp_ctx);
1160 return 0;
1166 send a ctdb control message
1167 timeout specifies how long we should wait for a reply.
1168 if timeout is NULL we wait indefinitely
1170 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
1171 uint32_t opcode, uint32_t flags, TDB_DATA data,
1172 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
1173 struct timeval *timeout,
1174 char **errormsg)
1176 struct ctdb_client_control_state *state;
1178 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
1179 flags, data, mem_ctx,
1180 timeout, errormsg);
1182 /* FIXME: Error conditions in ctdb_control_send return NULL without
1183 * setting errormsg. So, there is no way to distinguish between sucess
1184 * and failure when CTDB_CTRL_FLAG_NOREPLY is set */
1185 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
1186 if (status != NULL) {
1187 *status = 0;
1189 return 0;
1192 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
1193 errormsg);
1200 a process exists call. Returns 0 if process exists, -1 otherwise
1202 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
1204 int ret;
1205 TDB_DATA data;
1206 int32_t status;
1208 data.dptr = (uint8_t*)&pid;
1209 data.dsize = sizeof(pid);
1211 ret = ctdb_control(ctdb, destnode, 0,
1212 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
1213 NULL, NULL, &status, NULL, NULL);
1214 if (ret != 0) {
1215 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
1216 return -1;
1219 return status;
1223 get remote statistics
1225 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
1227 int ret;
1228 TDB_DATA data;
1229 int32_t res;
1231 ret = ctdb_control(ctdb, destnode, 0,
1232 CTDB_CONTROL_STATISTICS, 0, tdb_null,
1233 ctdb, &data, &res, NULL, NULL);
1234 if (ret != 0 || res != 0) {
1235 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
1236 return -1;
1239 if (data.dsize != sizeof(struct ctdb_statistics)) {
1240 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
1241 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
1242 return -1;
1245 *status = *(struct ctdb_statistics *)data.dptr;
1246 talloc_free(data.dptr);
1248 return 0;
1252 * get db statistics
1254 int ctdb_ctrl_dbstatistics(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1255 TALLOC_CTX *mem_ctx, struct ctdb_db_statistics **dbstat)
1257 int ret;
1258 TDB_DATA indata, outdata;
1259 int32_t res;
1260 struct ctdb_db_statistics *wire, *s;
1261 char *ptr;
1262 int i;
1264 indata.dptr = (uint8_t *)&dbid;
1265 indata.dsize = sizeof(dbid);
1267 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_STATISTICS,
1268 0, indata, ctdb, &outdata, &res, NULL, NULL);
1269 if (ret != 0 || res != 0) {
1270 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for dbstatistics failed\n"));
1271 return -1;
1274 if (outdata.dsize < offsetof(struct ctdb_db_statistics, hot_keys_wire)) {
1275 DEBUG(DEBUG_ERR,(__location__ " Wrong dbstatistics size %zi - expected >= %lu\n",
1276 outdata.dsize,
1277 (long unsigned int)sizeof(struct ctdb_statistics)));
1278 return -1;
1281 s = talloc_zero(mem_ctx, struct ctdb_db_statistics);
1282 if (s == NULL) {
1283 talloc_free(outdata.dptr);
1284 CTDB_NO_MEMORY(ctdb, s);
1287 wire = (struct ctdb_db_statistics *)outdata.dptr;
1288 *s = *wire;
1289 ptr = &wire->hot_keys_wire[0];
1290 for (i=0; i<wire->num_hot_keys; i++) {
1291 s->hot_keys[i].key.dptr = talloc_size(mem_ctx, s->hot_keys[i].key.dsize);
1292 if (s->hot_keys[i].key.dptr == NULL) {
1293 talloc_free(outdata.dptr);
1294 CTDB_NO_MEMORY(ctdb, s->hot_keys[i].key.dptr);
1297 memcpy(s->hot_keys[i].key.dptr, ptr, s->hot_keys[i].key.dsize);
1298 ptr += wire->hot_keys[i].key.dsize;
1301 talloc_free(outdata.dptr);
1302 *dbstat = s;
1303 return 0;
1307 shutdown a remote ctdb node
1309 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1311 struct ctdb_client_control_state *state;
1313 state = ctdb_control_send(ctdb, destnode, 0,
1314 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1315 NULL, &timeout, NULL);
1316 if (state == NULL) {
1317 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1318 return -1;
1321 return 0;
1325 get vnn map from a remote node
1327 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1329 int ret;
1330 TDB_DATA outdata;
1331 int32_t res;
1332 struct ctdb_vnn_map_wire *map;
1334 ret = ctdb_control(ctdb, destnode, 0,
1335 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1336 mem_ctx, &outdata, &res, &timeout, NULL);
1337 if (ret != 0 || res != 0) {
1338 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1339 return -1;
1342 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1343 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1344 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1345 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1346 return -1;
1349 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1350 CTDB_NO_MEMORY(ctdb, *vnnmap);
1351 (*vnnmap)->generation = map->generation;
1352 (*vnnmap)->size = map->size;
1353 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1355 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1356 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1357 talloc_free(outdata.dptr);
1359 return 0;
1364 get the recovery mode of a remote node
1366 struct ctdb_client_control_state *
1367 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1369 return ctdb_control_send(ctdb, destnode, 0,
1370 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1371 mem_ctx, &timeout, NULL);
1374 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1376 int ret;
1377 int32_t res;
1379 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1380 if (ret != 0) {
1381 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1382 return -1;
1385 if (recmode) {
1386 *recmode = (uint32_t)res;
1389 return 0;
1392 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1394 struct ctdb_client_control_state *state;
1396 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1397 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1404 set the recovery mode of a remote node
1406 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1408 int ret;
1409 TDB_DATA data;
1410 int32_t res;
1412 data.dsize = sizeof(uint32_t);
1413 data.dptr = (unsigned char *)&recmode;
1415 ret = ctdb_control(ctdb, destnode, 0,
1416 CTDB_CONTROL_SET_RECMODE, 0, data,
1417 NULL, NULL, &res, &timeout, NULL);
1418 if (ret != 0 || res != 0) {
1419 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1420 return -1;
1423 return 0;
1429 get the recovery master of a remote node
1431 struct ctdb_client_control_state *
1432 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1433 struct timeval timeout, uint32_t destnode)
1435 return ctdb_control_send(ctdb, destnode, 0,
1436 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1437 mem_ctx, &timeout, NULL);
1440 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1442 int ret;
1443 int32_t res;
1445 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1446 if (ret != 0) {
1447 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1448 return -1;
1451 if (recmaster) {
1452 *recmaster = (uint32_t)res;
1455 return 0;
1458 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1460 struct ctdb_client_control_state *state;
1462 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1463 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1468 set the recovery master of a remote node
1470 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1472 int ret;
1473 TDB_DATA data;
1474 int32_t res;
1476 ZERO_STRUCT(data);
1477 data.dsize = sizeof(uint32_t);
1478 data.dptr = (unsigned char *)&recmaster;
1480 ret = ctdb_control(ctdb, destnode, 0,
1481 CTDB_CONTROL_SET_RECMASTER, 0, data,
1482 NULL, NULL, &res, &timeout, NULL);
1483 if (ret != 0 || res != 0) {
1484 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1485 return -1;
1488 return 0;
1493 get a list of databases off a remote node
1495 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1496 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1498 int ret;
1499 TDB_DATA outdata;
1500 int32_t res;
1502 ret = ctdb_control(ctdb, destnode, 0,
1503 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1504 mem_ctx, &outdata, &res, &timeout, NULL);
1505 if (ret != 0 || res != 0) {
1506 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1507 return -1;
1510 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1511 talloc_free(outdata.dptr);
1513 return 0;
1517 get a list of nodes (vnn and flags ) from a remote node
1519 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1520 struct timeval timeout, uint32_t destnode,
1521 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1523 int ret;
1524 TDB_DATA outdata;
1525 int32_t res;
1527 ret = ctdb_control(ctdb, destnode, 0,
1528 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1529 mem_ctx, &outdata, &res, &timeout, NULL);
1530 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1531 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1532 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1534 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1535 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1536 return -1;
1539 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1540 talloc_free(outdata.dptr);
1542 return 0;
1546 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1548 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1549 struct timeval timeout, uint32_t destnode,
1550 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1552 int ret, i, len;
1553 TDB_DATA outdata;
1554 struct ctdb_node_mapv4 *nodemapv4;
1555 int32_t res;
1557 ret = ctdb_control(ctdb, destnode, 0,
1558 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1559 mem_ctx, &outdata, &res, &timeout, NULL);
1560 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1561 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1562 return -1;
1565 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1567 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1568 (*nodemap) = talloc_zero_size(mem_ctx, len);
1569 CTDB_NO_MEMORY(ctdb, (*nodemap));
1571 (*nodemap)->num = nodemapv4->num;
1572 for (i=0; i<nodemapv4->num; i++) {
1573 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1574 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1575 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1576 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1579 talloc_free(outdata.dptr);
1581 return 0;
1585 drop the transport, reload the nodes file and restart the transport
1587 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1588 struct timeval timeout, uint32_t destnode)
1590 int ret;
1591 int32_t res;
1593 ret = ctdb_control(ctdb, destnode, 0,
1594 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1595 NULL, NULL, &res, &timeout, NULL);
1596 if (ret != 0 || res != 0) {
1597 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1598 return -1;
1601 return 0;
1606 set vnn map on a node
1608 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1609 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1611 int ret;
1612 TDB_DATA data;
1613 int32_t res;
1614 struct ctdb_vnn_map_wire *map;
1615 size_t len;
1617 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1618 map = talloc_size(mem_ctx, len);
1619 CTDB_NO_MEMORY(ctdb, map);
1621 map->generation = vnnmap->generation;
1622 map->size = vnnmap->size;
1623 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1625 data.dsize = len;
1626 data.dptr = (uint8_t *)map;
1628 ret = ctdb_control(ctdb, destnode, 0,
1629 CTDB_CONTROL_SETVNNMAP, 0, data,
1630 NULL, NULL, &res, &timeout, NULL);
1631 if (ret != 0 || res != 0) {
1632 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1633 return -1;
1636 talloc_free(map);
1638 return 0;
1643 async send for pull database
1645 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1646 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1647 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1649 TDB_DATA indata;
1650 struct ctdb_control_pulldb *pull;
1651 struct ctdb_client_control_state *state;
1653 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1654 CTDB_NO_MEMORY_NULL(ctdb, pull);
1656 pull->db_id = dbid;
1657 pull->lmaster = lmaster;
1659 indata.dsize = sizeof(struct ctdb_control_pulldb);
1660 indata.dptr = (unsigned char *)pull;
1662 state = ctdb_control_send(ctdb, destnode, 0,
1663 CTDB_CONTROL_PULL_DB, 0, indata,
1664 mem_ctx, &timeout, NULL);
1665 talloc_free(pull);
1667 return state;
1671 async recv for pull database
1673 int ctdb_ctrl_pulldb_recv(
1674 struct ctdb_context *ctdb,
1675 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1676 TDB_DATA *outdata)
1678 int ret;
1679 int32_t res;
1681 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1682 if ( (ret != 0) || (res != 0) ){
1683 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1684 return -1;
1687 return 0;
1691 pull all keys and records for a specific database on a node
1693 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1694 uint32_t dbid, uint32_t lmaster,
1695 TALLOC_CTX *mem_ctx, struct timeval timeout,
1696 TDB_DATA *outdata)
1698 struct ctdb_client_control_state *state;
1700 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1701 timeout);
1703 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1708 change dmaster for all keys in the database to the new value
1710 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1711 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1713 int ret;
1714 TDB_DATA indata;
1715 int32_t res;
1717 indata.dsize = 2*sizeof(uint32_t);
1718 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1720 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1721 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1723 ret = ctdb_control(ctdb, destnode, 0,
1724 CTDB_CONTROL_SET_DMASTER, 0, indata,
1725 NULL, NULL, &res, &timeout, NULL);
1726 if (ret != 0 || res != 0) {
1727 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1728 return -1;
1731 return 0;
1735 ping a node, return number of clients connected
1737 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1739 int ret;
1740 int32_t res;
1742 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1743 tdb_null, NULL, NULL, &res, NULL, NULL);
1744 if (ret != 0) {
1745 return -1;
1747 return res;
1750 int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb,
1751 struct timeval timeout,
1752 uint32_t destnode,
1753 uint32_t *runstate)
1755 TDB_DATA outdata;
1756 int32_t res;
1757 int ret;
1759 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RUNSTATE, 0,
1760 tdb_null, ctdb, &outdata, &res, &timeout, NULL);
1761 if (ret != 0 || res != 0) {
1762 DEBUG(DEBUG_ERR,("ctdb_control for get_runstate failed\n"));
1763 return ret != 0 ? ret : res;
1766 if (outdata.dsize != sizeof(uint32_t)) {
1767 DEBUG(DEBUG_ERR,("Invalid return data in get_runstate\n"));
1768 talloc_free(outdata.dptr);
1769 return -1;
1772 if (runstate != NULL) {
1773 *runstate = *(uint32_t *)outdata.dptr;
1775 talloc_free(outdata.dptr);
1777 return 0;
1781 find the real path to a ltdb
1783 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1784 const char **path)
1786 int ret;
1787 int32_t res;
1788 TDB_DATA data;
1790 data.dptr = (uint8_t *)&dbid;
1791 data.dsize = sizeof(dbid);
1793 ret = ctdb_control(ctdb, destnode, 0,
1794 CTDB_CONTROL_GETDBPATH, 0, data,
1795 mem_ctx, &data, &res, &timeout, NULL);
1796 if (ret != 0 || res != 0) {
1797 return -1;
1800 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1801 if ((*path) == NULL) {
1802 return -1;
1805 talloc_free(data.dptr);
1807 return 0;
1811 find the name of a db
1813 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1814 const char **name)
1816 int ret;
1817 int32_t res;
1818 TDB_DATA data;
1820 data.dptr = (uint8_t *)&dbid;
1821 data.dsize = sizeof(dbid);
1823 ret = ctdb_control(ctdb, destnode, 0,
1824 CTDB_CONTROL_GET_DBNAME, 0, data,
1825 mem_ctx, &data, &res, &timeout, NULL);
1826 if (ret != 0 || res != 0) {
1827 return -1;
1830 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1831 if ((*name) == NULL) {
1832 return -1;
1835 talloc_free(data.dptr);
1837 return 0;
1841 get the health status of a db
1843 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1844 struct timeval timeout,
1845 uint32_t destnode,
1846 uint32_t dbid, TALLOC_CTX *mem_ctx,
1847 const char **reason)
1849 int ret;
1850 int32_t res;
1851 TDB_DATA data;
1853 data.dptr = (uint8_t *)&dbid;
1854 data.dsize = sizeof(dbid);
1856 ret = ctdb_control(ctdb, destnode, 0,
1857 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1858 mem_ctx, &data, &res, &timeout, NULL);
1859 if (ret != 0 || res != 0) {
1860 return -1;
1863 if (data.dsize == 0) {
1864 (*reason) = NULL;
1865 return 0;
1868 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1869 if ((*reason) == NULL) {
1870 return -1;
1873 talloc_free(data.dptr);
1875 return 0;
1879 * get db sequence number
1881 int ctdb_ctrl_getdbseqnum(struct ctdb_context *ctdb, struct timeval timeout,
1882 uint32_t destnode, uint32_t dbid, uint64_t *seqnum)
1884 int ret;
1885 int32_t res;
1886 TDB_DATA data, outdata;
1888 data.dptr = (uint8_t *)&dbid;
1889 data.dsize = sizeof(uint64_t); /* This is just wrong */
1891 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DB_SEQNUM,
1892 0, data, ctdb, &outdata, &res, &timeout, NULL);
1893 if (ret != 0 || res != 0) {
1894 DEBUG(DEBUG_ERR,("ctdb_control for getdbesqnum failed\n"));
1895 return -1;
1898 if (outdata.dsize != sizeof(uint64_t)) {
1899 DEBUG(DEBUG_ERR,("Invalid return data in get_dbseqnum\n"));
1900 talloc_free(outdata.dptr);
1901 return -1;
1904 if (seqnum != NULL) {
1905 *seqnum = *(uint64_t *)outdata.dptr;
1907 talloc_free(outdata.dptr);
1909 return 0;
1913 create a database
1915 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1916 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1918 int ret;
1919 int32_t res;
1920 TDB_DATA data;
1921 uint64_t tdb_flags = 0;
1923 data.dptr = discard_const(name);
1924 data.dsize = strlen(name)+1;
1926 /* Make sure that volatile databases use jenkins hash */
1927 if (!persistent) {
1928 tdb_flags = TDB_INCOMPATIBLE_HASH;
1931 ret = ctdb_control(ctdb, destnode, tdb_flags,
1932 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1933 0, data,
1934 mem_ctx, &data, &res, &timeout, NULL);
1936 if (ret != 0 || res != 0) {
1937 return -1;
1940 return 0;
1944 get debug level on a node
1946 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1948 int ret;
1949 int32_t res;
1950 TDB_DATA data;
1952 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1953 ctdb, &data, &res, NULL, NULL);
1954 if (ret != 0 || res != 0) {
1955 return -1;
1957 if (data.dsize != sizeof(int32_t)) {
1958 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1959 (unsigned)data.dsize));
1960 return -1;
1962 *level = *(int32_t *)data.dptr;
1963 talloc_free(data.dptr);
1964 return 0;
1968 set debug level on a node
1970 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1972 int ret;
1973 int32_t res;
1974 TDB_DATA data;
1976 data.dptr = (uint8_t *)&level;
1977 data.dsize = sizeof(level);
1979 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1980 NULL, NULL, &res, NULL, NULL);
1981 if (ret != 0 || res != 0) {
1982 return -1;
1984 return 0;
1989 get a list of connected nodes
1991 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1992 struct timeval timeout,
1993 TALLOC_CTX *mem_ctx,
1994 uint32_t *num_nodes)
1996 struct ctdb_node_map *map=NULL;
1997 int ret, i;
1998 uint32_t *nodes;
2000 *num_nodes = 0;
2002 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
2003 if (ret != 0) {
2004 return NULL;
2007 nodes = talloc_array(mem_ctx, uint32_t, map->num);
2008 if (nodes == NULL) {
2009 return NULL;
2012 for (i=0;i<map->num;i++) {
2013 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
2014 nodes[*num_nodes] = map->nodes[i].pnn;
2015 (*num_nodes)++;
2019 return nodes;
2024 reset remote status
2026 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
2028 int ret;
2029 int32_t res;
2031 ret = ctdb_control(ctdb, destnode, 0,
2032 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
2033 NULL, NULL, &res, NULL, NULL);
2034 if (ret != 0 || res != 0) {
2035 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
2036 return -1;
2038 return 0;
2042 attach to a specific database - client call
2044 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb,
2045 struct timeval timeout,
2046 const char *name,
2047 bool persistent,
2048 uint32_t tdb_flags)
2050 struct ctdb_db_context *ctdb_db;
2051 TDB_DATA data;
2052 int ret;
2053 int32_t res;
2055 ctdb_db = ctdb_db_handle(ctdb, name);
2056 if (ctdb_db) {
2057 return ctdb_db;
2060 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
2061 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
2063 ctdb_db->ctdb = ctdb;
2064 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
2065 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
2067 data.dptr = discard_const(name);
2068 data.dsize = strlen(name)+1;
2070 /* CTDB has switched to using jenkins hash for volatile databases.
2071 * Even if tdb_flags do not explicitly mention TDB_INCOMPATIBLE_HASH,
2072 * always set it.
2074 if (!persistent) {
2075 tdb_flags |= TDB_INCOMPATIBLE_HASH;
2078 /* tell ctdb daemon to attach */
2079 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
2080 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
2081 0, data, ctdb_db, &data, &res, NULL, NULL);
2082 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
2083 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
2084 talloc_free(ctdb_db);
2085 return NULL;
2088 ctdb_db->db_id = *(uint32_t *)data.dptr;
2089 talloc_free(data.dptr);
2091 ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
2092 if (ret != 0) {
2093 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
2094 talloc_free(ctdb_db);
2095 return NULL;
2098 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
2099 if (ctdb->valgrinding) {
2100 tdb_flags |= TDB_NOMMAP;
2102 tdb_flags |= TDB_DISALLOW_NESTING;
2104 ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path, 0, tdb_flags,
2105 O_RDWR, 0);
2106 if (ctdb_db->ltdb == NULL) {
2107 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
2108 talloc_free(ctdb_db);
2109 return NULL;
2112 ctdb_db->persistent = persistent;
2114 DLIST_ADD(ctdb->db_list, ctdb_db);
2116 /* add well known functions */
2117 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
2118 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
2119 ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC);
2121 return ctdb_db;
2125 * detach from a specific database - client call
2127 int ctdb_detach(struct ctdb_context *ctdb, uint32_t db_id)
2129 int ret;
2130 int32_t status;
2131 TDB_DATA data;
2133 data.dsize = sizeof(db_id);
2134 data.dptr = (uint8_t *)&db_id;
2136 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_DB_DETACH,
2137 0, data, NULL, NULL, &status, NULL, NULL);
2138 if (ret != 0 || status != 0) {
2139 return -1;
2141 return 0;
2145 setup a call for a database
2147 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
2149 struct ctdb_registered_call *call;
2151 #if 0
2152 TDB_DATA data;
2153 int32_t status;
2154 struct ctdb_control_set_call c;
2155 int ret;
2157 /* this is no longer valid with the separate daemon architecture */
2158 c.db_id = ctdb_db->db_id;
2159 c.fn = fn;
2160 c.id = id;
2162 data.dptr = (uint8_t *)&c;
2163 data.dsize = sizeof(c);
2165 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
2166 data, NULL, NULL, &status, NULL, NULL);
2167 if (ret != 0 || status != 0) {
2168 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
2169 return -1;
2171 #endif
2173 /* also register locally */
2174 call = talloc(ctdb_db, struct ctdb_registered_call);
2175 call->fn = fn;
2176 call->id = id;
2178 DLIST_ADD(ctdb_db->calls, call);
2179 return 0;
2183 struct traverse_state {
2184 bool done;
2185 uint32_t count;
2186 ctdb_traverse_func fn;
2187 void *private_data;
2188 bool listemptyrecords;
2192 called on each key during a ctdb_traverse
2194 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
2196 struct traverse_state *state = (struct traverse_state *)p;
2197 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
2198 TDB_DATA key;
2200 if (data.dsize < sizeof(uint32_t) ||
2201 d->length != data.dsize) {
2202 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
2203 state->done = true;
2204 return;
2207 key.dsize = d->keylen;
2208 key.dptr = &d->data[0];
2209 data.dsize = d->datalen;
2210 data.dptr = &d->data[d->keylen];
2212 if (key.dsize == 0 && data.dsize == 0) {
2213 /* end of traverse */
2214 state->done = true;
2215 return;
2218 if (!state->listemptyrecords &&
2219 data.dsize == sizeof(struct ctdb_ltdb_header))
2221 /* empty records are deleted records in ctdb */
2222 return;
2225 if (state->fn(ctdb, key, data, state->private_data) != 0) {
2226 state->done = true;
2229 state->count++;
2233 * start a cluster wide traverse, calling the supplied fn on each record
2234 * return the number of records traversed, or -1 on error
2236 * Extendet variant with a flag to signal whether empty records should
2237 * be listed.
2239 static int ctdb_traverse_ext(struct ctdb_db_context *ctdb_db,
2240 ctdb_traverse_func fn,
2241 bool withemptyrecords,
2242 void *private_data)
2244 TDB_DATA data;
2245 struct ctdb_traverse_start_ext t;
2246 int32_t status;
2247 int ret;
2248 uint64_t srvid = (getpid() | 0xFLL<<60);
2249 struct traverse_state state;
2251 state.done = false;
2252 state.count = 0;
2253 state.private_data = private_data;
2254 state.fn = fn;
2255 state.listemptyrecords = withemptyrecords;
2257 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
2258 if (ret != 0) {
2259 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
2260 return -1;
2263 t.db_id = ctdb_db->db_id;
2264 t.srvid = srvid;
2265 t.reqid = 0;
2266 t.withemptyrecords = withemptyrecords;
2268 data.dptr = (uint8_t *)&t;
2269 data.dsize = sizeof(t);
2271 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START_EXT, 0,
2272 data, NULL, NULL, &status, NULL, NULL);
2273 if (ret != 0 || status != 0) {
2274 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
2275 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2276 return -1;
2279 while (!state.done) {
2280 event_loop_once(ctdb_db->ctdb->ev);
2283 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
2284 if (ret != 0) {
2285 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
2286 return -1;
2289 return state.count;
2293 * start a cluster wide traverse, calling the supplied fn on each record
2294 * return the number of records traversed, or -1 on error
2296 * Standard version which does not list the empty records:
2297 * These are considered deleted.
2299 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
2301 return ctdb_traverse_ext(ctdb_db, fn, false, private_data);
2304 #define ISASCII(x) (isprint(x) && !strchr("\"\\", (x)))
2306 called on each key during a catdb
2308 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
2310 int i;
2311 struct ctdb_dump_db_context *c = (struct ctdb_dump_db_context *)p;
2312 FILE *f = c->f;
2313 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
2315 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
2316 for (i=0;i<key.dsize;i++) {
2317 if (ISASCII(key.dptr[i])) {
2318 fprintf(f, "%c", key.dptr[i]);
2319 } else {
2320 fprintf(f, "\\%02X", key.dptr[i]);
2323 fprintf(f, "\"\n");
2325 fprintf(f, "dmaster: %u\n", h->dmaster);
2326 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
2328 if (c->printlmaster && ctdb->vnn_map != NULL) {
2329 fprintf(f, "lmaster: %u\n", ctdb_lmaster(ctdb, &key));
2332 if (c->printhash) {
2333 fprintf(f, "hash: 0x%08x\n", ctdb_hash(&key));
2336 if (c->printrecordflags) {
2337 fprintf(f, "flags: 0x%08x", h->flags);
2338 if (h->flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA) printf(" MIGRATED_WITH_DATA");
2339 if (h->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) printf(" VACUUM_MIGRATED");
2340 if (h->flags & CTDB_REC_FLAG_AUTOMATIC) printf(" AUTOMATIC");
2341 if (h->flags & CTDB_REC_RO_HAVE_DELEGATIONS) printf(" RO_HAVE_DELEGATIONS");
2342 if (h->flags & CTDB_REC_RO_HAVE_READONLY) printf(" RO_HAVE_READONLY");
2343 if (h->flags & CTDB_REC_RO_REVOKING_READONLY) printf(" RO_REVOKING_READONLY");
2344 if (h->flags & CTDB_REC_RO_REVOKE_COMPLETE) printf(" RO_REVOKE_COMPLETE");
2345 fprintf(f, "\n");
2348 if (c->printdatasize) {
2349 fprintf(f, "data size: %u\n", (unsigned)data.dsize);
2350 } else {
2351 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
2352 for (i=sizeof(*h);i<data.dsize;i++) {
2353 if (ISASCII(data.dptr[i])) {
2354 fprintf(f, "%c", data.dptr[i]);
2355 } else {
2356 fprintf(f, "\\%02X", data.dptr[i]);
2359 fprintf(f, "\"\n");
2362 fprintf(f, "\n");
2364 return 0;
2368 convenience function to list all keys to stdout
2370 int ctdb_dump_db(struct ctdb_db_context *ctdb_db,
2371 struct ctdb_dump_db_context *ctx)
2373 return ctdb_traverse_ext(ctdb_db, ctdb_dumpdb_record,
2374 ctx->printemptyrecords, ctx);
2378 get the pid of a ctdb daemon
2380 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
2382 int ret;
2383 int32_t res;
2385 ret = ctdb_control(ctdb, destnode, 0,
2386 CTDB_CONTROL_GET_PID, 0, tdb_null,
2387 NULL, NULL, &res, &timeout, NULL);
2388 if (ret != 0) {
2389 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
2390 return -1;
2393 *pid = res;
2395 return 0;
2400 async freeze send control
2402 struct ctdb_client_control_state *
2403 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
2405 return ctdb_control_send(ctdb, destnode, priority,
2406 CTDB_CONTROL_FREEZE, 0, tdb_null,
2407 mem_ctx, &timeout, NULL);
2411 async freeze recv control
2413 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
2415 int ret;
2416 int32_t res;
2418 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
2419 if ( (ret != 0) || (res != 0) ){
2420 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
2421 return -1;
2424 return 0;
2428 freeze databases of a certain priority
2430 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2432 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2433 struct ctdb_client_control_state *state;
2434 int ret;
2436 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
2437 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
2438 talloc_free(tmp_ctx);
2440 return ret;
2443 /* Freeze all databases */
2444 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2446 int i;
2448 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
2449 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2450 return -1;
2453 return 0;
2457 thaw databases of a certain priority
2459 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2461 int ret;
2462 int32_t res;
2464 ret = ctdb_control(ctdb, destnode, priority,
2465 CTDB_CONTROL_THAW, 0, tdb_null,
2466 NULL, NULL, &res, &timeout, NULL);
2467 if (ret != 0 || res != 0) {
2468 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2469 return -1;
2472 return 0;
2475 /* thaw all databases */
2476 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2478 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2482 get pnn of a node, or -1
2484 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2486 int ret;
2487 int32_t res;
2489 ret = ctdb_control(ctdb, destnode, 0,
2490 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2491 NULL, NULL, &res, &timeout, NULL);
2492 if (ret != 0) {
2493 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2494 return -1;
2497 return res;
2501 get the monitoring mode of a remote node
2503 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2505 int ret;
2506 int32_t res;
2508 ret = ctdb_control(ctdb, destnode, 0,
2509 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2510 NULL, NULL, &res, &timeout, NULL);
2511 if (ret != 0) {
2512 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2513 return -1;
2516 *monmode = res;
2518 return 0;
2523 set the monitoring mode of a remote node to active
2525 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2527 int ret;
2530 ret = ctdb_control(ctdb, destnode, 0,
2531 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2532 NULL, NULL,NULL, &timeout, NULL);
2533 if (ret != 0) {
2534 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2535 return -1;
2540 return 0;
2544 set the monitoring mode of a remote node to disable
2546 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2548 int ret;
2551 ret = ctdb_control(ctdb, destnode, 0,
2552 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2553 NULL, NULL, NULL, &timeout, NULL);
2554 if (ret != 0) {
2555 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2556 return -1;
2561 return 0;
2567 sent to a node to make it take over an ip address
2569 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2570 uint32_t destnode, struct ctdb_public_ip *ip)
2572 TDB_DATA data;
2573 struct ctdb_public_ipv4 ipv4;
2574 int ret;
2575 int32_t res;
2577 if (ip->addr.sa.sa_family == AF_INET) {
2578 ipv4.pnn = ip->pnn;
2579 ipv4.sin = ip->addr.ip;
2581 data.dsize = sizeof(ipv4);
2582 data.dptr = (uint8_t *)&ipv4;
2584 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2585 NULL, &res, &timeout, NULL);
2586 } else {
2587 data.dsize = sizeof(*ip);
2588 data.dptr = (uint8_t *)ip;
2590 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2591 NULL, &res, &timeout, NULL);
2594 if (ret != 0 || res != 0) {
2595 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2596 return -1;
2599 return 0;
2604 sent to a node to make it release an ip address
2606 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2607 uint32_t destnode, struct ctdb_public_ip *ip)
2609 TDB_DATA data;
2610 struct ctdb_public_ipv4 ipv4;
2611 int ret;
2612 int32_t res;
2614 if (ip->addr.sa.sa_family == AF_INET) {
2615 ipv4.pnn = ip->pnn;
2616 ipv4.sin = ip->addr.ip;
2618 data.dsize = sizeof(ipv4);
2619 data.dptr = (uint8_t *)&ipv4;
2621 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2622 NULL, &res, &timeout, NULL);
2623 } else {
2624 data.dsize = sizeof(*ip);
2625 data.dptr = (uint8_t *)ip;
2627 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2628 NULL, &res, &timeout, NULL);
2631 if (ret != 0 || res != 0) {
2632 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2633 return -1;
2636 return 0;
2641 get a tunable
2643 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2644 struct timeval timeout,
2645 uint32_t destnode,
2646 const char *name, uint32_t *value)
2648 struct ctdb_control_get_tunable *t;
2649 TDB_DATA data, outdata;
2650 int32_t res;
2651 int ret;
2653 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2654 data.dptr = talloc_size(ctdb, data.dsize);
2655 CTDB_NO_MEMORY(ctdb, data.dptr);
2657 t = (struct ctdb_control_get_tunable *)data.dptr;
2658 t->length = strlen(name)+1;
2659 memcpy(t->name, name, t->length);
2661 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2662 &outdata, &res, &timeout, NULL);
2663 talloc_free(data.dptr);
2664 if (ret != 0 || res != 0) {
2665 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2666 return ret != 0 ? ret : res;
2669 if (outdata.dsize != sizeof(uint32_t)) {
2670 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2671 talloc_free(outdata.dptr);
2672 return -1;
2675 *value = *(uint32_t *)outdata.dptr;
2676 talloc_free(outdata.dptr);
2678 return 0;
2682 set a tunable
2684 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2685 struct timeval timeout,
2686 uint32_t destnode,
2687 const char *name, uint32_t value)
2689 struct ctdb_control_set_tunable *t;
2690 TDB_DATA data;
2691 int32_t res;
2692 int ret;
2694 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2695 data.dptr = talloc_size(ctdb, data.dsize);
2696 CTDB_NO_MEMORY(ctdb, data.dptr);
2698 t = (struct ctdb_control_set_tunable *)data.dptr;
2699 t->length = strlen(name)+1;
2700 memcpy(t->name, name, t->length);
2701 t->value = value;
2703 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2704 NULL, &res, &timeout, NULL);
2705 talloc_free(data.dptr);
2706 if (ret != 0 || res != 0) {
2707 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2708 return -1;
2711 return 0;
2715 list tunables
2717 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2718 struct timeval timeout,
2719 uint32_t destnode,
2720 TALLOC_CTX *mem_ctx,
2721 const char ***list, uint32_t *count)
2723 TDB_DATA outdata;
2724 int32_t res;
2725 int ret;
2726 struct ctdb_control_list_tunable *t;
2727 char *p, *s, *ptr;
2729 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2730 mem_ctx, &outdata, &res, &timeout, NULL);
2731 if (ret != 0 || res != 0) {
2732 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2733 return -1;
2736 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2737 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2738 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2739 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2740 talloc_free(outdata.dptr);
2741 return -1;
2744 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2745 CTDB_NO_MEMORY(ctdb, p);
2747 talloc_free(outdata.dptr);
2749 (*list) = NULL;
2750 (*count) = 0;
2752 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2753 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2754 CTDB_NO_MEMORY(ctdb, *list);
2755 (*list)[*count] = talloc_strdup(*list, s);
2756 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2757 (*count)++;
2760 talloc_free(p);
2762 return 0;
2766 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2767 struct timeval timeout, uint32_t destnode,
2768 TALLOC_CTX *mem_ctx,
2769 uint32_t flags,
2770 struct ctdb_all_public_ips **ips)
2772 int ret;
2773 TDB_DATA outdata;
2774 int32_t res;
2776 ret = ctdb_control(ctdb, destnode, 0,
2777 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2778 mem_ctx, &outdata, &res, &timeout, NULL);
2779 if (ret == 0 && res == -1) {
2780 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2781 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2783 if (ret != 0 || res != 0) {
2784 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2785 return -1;
2788 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2789 talloc_free(outdata.dptr);
2791 return 0;
2794 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2795 struct timeval timeout, uint32_t destnode,
2796 TALLOC_CTX *mem_ctx,
2797 struct ctdb_all_public_ips **ips)
2799 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2800 destnode, mem_ctx,
2801 0, ips);
2804 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2805 struct timeval timeout, uint32_t destnode,
2806 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2808 int ret, i, len;
2809 TDB_DATA outdata;
2810 int32_t res;
2811 struct ctdb_all_public_ipsv4 *ipsv4;
2813 ret = ctdb_control(ctdb, destnode, 0,
2814 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2815 mem_ctx, &outdata, &res, &timeout, NULL);
2816 if (ret != 0 || res != 0) {
2817 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2818 return -1;
2821 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2822 len = offsetof(struct ctdb_all_public_ips, ips) +
2823 ipsv4->num*sizeof(struct ctdb_public_ip);
2824 *ips = talloc_zero_size(mem_ctx, len);
2825 CTDB_NO_MEMORY(ctdb, *ips);
2826 (*ips)->num = ipsv4->num;
2827 for (i=0; i<ipsv4->num; i++) {
2828 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2829 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2832 talloc_free(outdata.dptr);
2834 return 0;
2837 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2838 struct timeval timeout, uint32_t destnode,
2839 TALLOC_CTX *mem_ctx,
2840 const ctdb_sock_addr *addr,
2841 struct ctdb_control_public_ip_info **_info)
2843 int ret;
2844 TDB_DATA indata;
2845 TDB_DATA outdata;
2846 int32_t res;
2847 struct ctdb_control_public_ip_info *info;
2848 uint32_t len;
2849 uint32_t i;
2851 indata.dptr = discard_const_p(uint8_t, addr);
2852 indata.dsize = sizeof(*addr);
2854 ret = ctdb_control(ctdb, destnode, 0,
2855 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2856 mem_ctx, &outdata, &res, &timeout, NULL);
2857 if (ret != 0 || res != 0) {
2858 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2859 "failed ret:%d res:%d\n",
2860 ret, res));
2861 return -1;
2864 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2865 if (len > outdata.dsize) {
2866 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2867 "returned invalid data with size %u > %u\n",
2868 (unsigned int)outdata.dsize,
2869 (unsigned int)len));
2870 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2871 return -1;
2874 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2875 len += info->num*sizeof(struct ctdb_control_iface_info);
2877 if (len > outdata.dsize) {
2878 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2879 "returned invalid data with size %u > %u\n",
2880 (unsigned int)outdata.dsize,
2881 (unsigned int)len));
2882 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2883 return -1;
2886 /* make sure we null terminate the returned strings */
2887 for (i=0; i < info->num; i++) {
2888 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2891 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2892 outdata.dptr,
2893 outdata.dsize);
2894 talloc_free(outdata.dptr);
2895 if (*_info == NULL) {
2896 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2897 "talloc_memdup size %u failed\n",
2898 (unsigned int)outdata.dsize));
2899 return -1;
2902 return 0;
2905 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2906 struct timeval timeout, uint32_t destnode,
2907 TALLOC_CTX *mem_ctx,
2908 struct ctdb_control_get_ifaces **_ifaces)
2910 int ret;
2911 TDB_DATA outdata;
2912 int32_t res;
2913 struct ctdb_control_get_ifaces *ifaces;
2914 uint32_t len;
2915 uint32_t i;
2917 ret = ctdb_control(ctdb, destnode, 0,
2918 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2919 mem_ctx, &outdata, &res, &timeout, NULL);
2920 if (ret != 0 || res != 0) {
2921 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2922 "failed ret:%d res:%d\n",
2923 ret, res));
2924 return -1;
2927 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2928 if (len > outdata.dsize) {
2929 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2930 "returned invalid data with size %u > %u\n",
2931 (unsigned int)outdata.dsize,
2932 (unsigned int)len));
2933 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2934 return -1;
2937 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2938 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2940 if (len > outdata.dsize) {
2941 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2942 "returned invalid data with size %u > %u\n",
2943 (unsigned int)outdata.dsize,
2944 (unsigned int)len));
2945 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2946 return -1;
2949 /* make sure we null terminate the returned strings */
2950 for (i=0; i < ifaces->num; i++) {
2951 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2954 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2955 outdata.dptr,
2956 outdata.dsize);
2957 talloc_free(outdata.dptr);
2958 if (*_ifaces == NULL) {
2959 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2960 "talloc_memdup size %u failed\n",
2961 (unsigned int)outdata.dsize));
2962 return -1;
2965 return 0;
2968 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2969 struct timeval timeout, uint32_t destnode,
2970 TALLOC_CTX *mem_ctx,
2971 const struct ctdb_control_iface_info *info)
2973 int ret;
2974 TDB_DATA indata;
2975 int32_t res;
2977 indata.dptr = discard_const_p(uint8_t, info);
2978 indata.dsize = sizeof(*info);
2980 ret = ctdb_control(ctdb, destnode, 0,
2981 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2982 mem_ctx, NULL, &res, &timeout, NULL);
2983 if (ret != 0 || res != 0) {
2984 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2985 "failed ret:%d res:%d\n",
2986 ret, res));
2987 return -1;
2990 return 0;
2994 set/clear the permanent disabled bit on a remote node
2996 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2997 uint32_t set, uint32_t clear)
2999 int ret;
3000 TDB_DATA data;
3001 struct ctdb_node_map *nodemap=NULL;
3002 struct ctdb_node_flag_change c;
3003 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
3004 uint32_t recmaster;
3005 uint32_t *nodes;
3008 /* find the recovery master */
3009 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
3010 if (ret != 0) {
3011 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
3012 talloc_free(tmp_ctx);
3013 return ret;
3017 /* read the node flags from the recmaster */
3018 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
3019 if (ret != 0) {
3020 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
3021 talloc_free(tmp_ctx);
3022 return -1;
3024 if (destnode >= nodemap->num) {
3025 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
3026 talloc_free(tmp_ctx);
3027 return -1;
3030 c.pnn = destnode;
3031 c.old_flags = nodemap->nodes[destnode].flags;
3032 c.new_flags = c.old_flags;
3033 c.new_flags |= set;
3034 c.new_flags &= ~clear;
3036 data.dsize = sizeof(c);
3037 data.dptr = (unsigned char *)&c;
3039 /* send the flags update to all connected nodes */
3040 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
3042 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
3043 nodes, 0,
3044 timeout, false, data,
3045 NULL, NULL,
3046 NULL) != 0) {
3047 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
3049 talloc_free(tmp_ctx);
3050 return -1;
3053 talloc_free(tmp_ctx);
3054 return 0;
3059 get all tunables
3061 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
3062 struct timeval timeout,
3063 uint32_t destnode,
3064 struct ctdb_tunable *tunables)
3066 TDB_DATA outdata;
3067 int ret;
3068 int32_t res;
3070 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
3071 &outdata, &res, &timeout, NULL);
3072 if (ret != 0 || res != 0) {
3073 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
3074 return -1;
3077 if (outdata.dsize != sizeof(*tunables)) {
3078 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
3079 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
3080 return -1;
3083 *tunables = *(struct ctdb_tunable *)outdata.dptr;
3084 talloc_free(outdata.dptr);
3085 return 0;
3089 add a public address to a node
3091 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
3092 struct timeval timeout,
3093 uint32_t destnode,
3094 struct ctdb_control_ip_iface *pub)
3096 TDB_DATA data;
3097 int32_t res;
3098 int ret;
3100 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3101 data.dptr = (unsigned char *)pub;
3103 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
3104 NULL, &res, &timeout, NULL);
3105 if (ret != 0 || res != 0) {
3106 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
3107 return -1;
3110 return 0;
3114 delete a public address from a node
3116 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
3117 struct timeval timeout,
3118 uint32_t destnode,
3119 struct ctdb_control_ip_iface *pub)
3121 TDB_DATA data;
3122 int32_t res;
3123 int ret;
3125 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
3126 data.dptr = (unsigned char *)pub;
3128 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
3129 NULL, &res, &timeout, NULL);
3130 if (ret != 0 || res != 0) {
3131 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
3132 return -1;
3135 return 0;
3139 kill a tcp connection
3141 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
3142 struct timeval timeout,
3143 uint32_t destnode,
3144 struct ctdb_control_killtcp *killtcp)
3146 TDB_DATA data;
3147 int32_t res;
3148 int ret;
3150 data.dsize = sizeof(struct ctdb_control_killtcp);
3151 data.dptr = (unsigned char *)killtcp;
3153 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
3154 NULL, &res, &timeout, NULL);
3155 if (ret != 0 || res != 0) {
3156 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
3157 return -1;
3160 return 0;
3164 send a gratious arp
3166 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
3167 struct timeval timeout,
3168 uint32_t destnode,
3169 ctdb_sock_addr *addr,
3170 const char *ifname)
3172 TDB_DATA data;
3173 int32_t res;
3174 int ret, len;
3175 struct ctdb_control_gratious_arp *gratious_arp;
3176 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
3179 len = strlen(ifname)+1;
3180 gratious_arp = talloc_size(tmp_ctx,
3181 offsetof(struct ctdb_control_gratious_arp, iface) + len);
3182 CTDB_NO_MEMORY(ctdb, gratious_arp);
3184 gratious_arp->addr = *addr;
3185 gratious_arp->len = len;
3186 memcpy(&gratious_arp->iface[0], ifname, len);
3189 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
3190 data.dptr = (unsigned char *)gratious_arp;
3192 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
3193 NULL, &res, &timeout, NULL);
3194 if (ret != 0 || res != 0) {
3195 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
3196 talloc_free(tmp_ctx);
3197 return -1;
3200 talloc_free(tmp_ctx);
3201 return 0;
3205 get a list of all tcp tickles that a node knows about for a particular vnn
3207 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
3208 struct timeval timeout, uint32_t destnode,
3209 TALLOC_CTX *mem_ctx,
3210 ctdb_sock_addr *addr,
3211 struct ctdb_control_tcp_tickle_list **list)
3213 int ret;
3214 TDB_DATA data, outdata;
3215 int32_t status;
3217 data.dptr = (uint8_t*)addr;
3218 data.dsize = sizeof(ctdb_sock_addr);
3220 ret = ctdb_control(ctdb, destnode, 0,
3221 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
3222 mem_ctx, &outdata, &status, NULL, NULL);
3223 if (ret != 0 || status != 0) {
3224 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
3225 return -1;
3228 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
3230 return status;
3234 register a server id
3236 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
3237 struct timeval timeout,
3238 struct ctdb_server_id *id)
3240 TDB_DATA data;
3241 int32_t res;
3242 int ret;
3244 data.dsize = sizeof(struct ctdb_server_id);
3245 data.dptr = (unsigned char *)id;
3247 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3248 CTDB_CONTROL_REGISTER_SERVER_ID,
3249 0, data, NULL,
3250 NULL, &res, &timeout, NULL);
3251 if (ret != 0 || res != 0) {
3252 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
3253 return -1;
3256 return 0;
3260 unregister a server id
3262 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
3263 struct timeval timeout,
3264 struct ctdb_server_id *id)
3266 TDB_DATA data;
3267 int32_t res;
3268 int ret;
3270 data.dsize = sizeof(struct ctdb_server_id);
3271 data.dptr = (unsigned char *)id;
3273 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
3274 CTDB_CONTROL_UNREGISTER_SERVER_ID,
3275 0, data, NULL,
3276 NULL, &res, &timeout, NULL);
3277 if (ret != 0 || res != 0) {
3278 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
3279 return -1;
3282 return 0;
3287 check if a server id exists
3289 if a server id does exist, return *status == 1, otherwise *status == 0
3291 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
3292 struct timeval timeout,
3293 uint32_t destnode,
3294 struct ctdb_server_id *id,
3295 uint32_t *status)
3297 TDB_DATA data;
3298 int32_t res;
3299 int ret;
3301 data.dsize = sizeof(struct ctdb_server_id);
3302 data.dptr = (unsigned char *)id;
3304 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
3305 0, data, NULL,
3306 NULL, &res, &timeout, NULL);
3307 if (ret != 0) {
3308 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
3309 return -1;
3312 if (res) {
3313 *status = 1;
3314 } else {
3315 *status = 0;
3318 return 0;
3322 get the list of server ids that are registered on a node
3324 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
3325 TALLOC_CTX *mem_ctx,
3326 struct timeval timeout, uint32_t destnode,
3327 struct ctdb_server_id_list **svid_list)
3329 int ret;
3330 TDB_DATA outdata;
3331 int32_t res;
3333 ret = ctdb_control(ctdb, destnode, 0,
3334 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
3335 mem_ctx, &outdata, &res, &timeout, NULL);
3336 if (ret != 0 || res != 0) {
3337 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
3338 return -1;
3341 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
3343 return 0;
3347 initialise the ctdb daemon for client applications
3349 NOTE: In current code the daemon does not fork. This is for testing purposes only
3350 and to simplify the code.
3352 struct ctdb_context *ctdb_init(struct event_context *ev)
3354 int ret;
3355 struct ctdb_context *ctdb;
3357 ctdb = talloc_zero(ev, struct ctdb_context);
3358 if (ctdb == NULL) {
3359 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
3360 return NULL;
3362 ctdb->ev = ev;
3363 ctdb->idr = idr_init(ctdb);
3364 /* Wrap early to exercise code. */
3365 ctdb->lastid = INT_MAX-200;
3366 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
3368 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
3369 if (ret != 0) {
3370 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
3371 talloc_free(ctdb);
3372 return NULL;
3375 ctdb->statistics.statistics_start_time = timeval_current();
3377 return ctdb;
3382 set some ctdb flags
3384 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
3386 ctdb->flags |= flags;
3390 setup the local socket name
3392 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
3394 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
3395 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
3397 return 0;
3400 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
3402 return ctdb->daemon.name;
3406 return the pnn of this node
3408 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
3410 return ctdb->pnn;
3415 get the uptime of a remote node
3417 struct ctdb_client_control_state *
3418 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3420 return ctdb_control_send(ctdb, destnode, 0,
3421 CTDB_CONTROL_UPTIME, 0, tdb_null,
3422 mem_ctx, &timeout, NULL);
3425 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
3427 int ret;
3428 int32_t res;
3429 TDB_DATA outdata;
3431 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3432 if (ret != 0 || res != 0) {
3433 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
3434 return -1;
3437 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
3439 return 0;
3442 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
3444 struct ctdb_client_control_state *state;
3446 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
3447 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
3451 send a control to execute the "recovered" event script on a node
3453 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3455 int ret;
3456 int32_t status;
3458 ret = ctdb_control(ctdb, destnode, 0,
3459 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3460 NULL, NULL, &status, &timeout, NULL);
3461 if (ret != 0 || status != 0) {
3462 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3463 return -1;
3466 return 0;
3470 callback for the async helpers used when sending the same control
3471 to multiple nodes in parallell.
3473 static void async_callback(struct ctdb_client_control_state *state)
3475 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3476 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3477 int ret;
3478 TDB_DATA outdata;
3479 int32_t res = -1;
3480 uint32_t destnode = state->c->hdr.destnode;
3482 outdata.dsize = 0;
3483 outdata.dptr = NULL;
3485 /* one more node has responded with recmode data */
3486 data->count--;
3488 /* if we failed to push the db, then return an error and let
3489 the main loop try again.
3491 if (state->state != CTDB_CONTROL_DONE) {
3492 if ( !data->dont_log_errors) {
3493 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3495 data->fail_count++;
3496 if (state->state == CTDB_CONTROL_TIMEOUT) {
3497 res = -ETIME;
3498 } else {
3499 res = -1;
3501 if (data->fail_callback) {
3502 data->fail_callback(ctdb, destnode, res, outdata,
3503 data->callback_data);
3505 return;
3508 state->async.fn = NULL;
3510 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3511 if ((ret != 0) || (res != 0)) {
3512 if ( !data->dont_log_errors) {
3513 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3515 data->fail_count++;
3516 if (data->fail_callback) {
3517 data->fail_callback(ctdb, destnode, res, outdata,
3518 data->callback_data);
3521 if ((ret == 0) && (data->callback != NULL)) {
3522 data->callback(ctdb, destnode, res, outdata,
3523 data->callback_data);
3528 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3530 /* set up the callback functions */
3531 state->async.fn = async_callback;
3532 state->async.private_data = data;
3534 /* one more control to wait for to complete */
3535 data->count++;
3539 /* wait for up to the maximum number of seconds allowed
3540 or until all nodes we expect a response from has replied
3542 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3544 while (data->count > 0) {
3545 event_loop_once(ctdb->ev);
3547 if (data->fail_count != 0) {
3548 if (!data->dont_log_errors) {
3549 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3550 data->fail_count));
3552 return -1;
3554 return 0;
3559 perform a simple control on the listed nodes
3560 The control cannot return data
3562 int ctdb_client_async_control(struct ctdb_context *ctdb,
3563 enum ctdb_controls opcode,
3564 uint32_t *nodes,
3565 uint64_t srvid,
3566 struct timeval timeout,
3567 bool dont_log_errors,
3568 TDB_DATA data,
3569 client_async_callback client_callback,
3570 client_async_callback fail_callback,
3571 void *callback_data)
3573 struct client_async_data *async_data;
3574 struct ctdb_client_control_state *state;
3575 int j, num_nodes;
3577 async_data = talloc_zero(ctdb, struct client_async_data);
3578 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3579 async_data->dont_log_errors = dont_log_errors;
3580 async_data->callback = client_callback;
3581 async_data->fail_callback = fail_callback;
3582 async_data->callback_data = callback_data;
3583 async_data->opcode = opcode;
3585 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3587 /* loop over all nodes and send an async control to each of them */
3588 for (j=0; j<num_nodes; j++) {
3589 uint32_t pnn = nodes[j];
3591 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3592 0, data, async_data, &timeout, NULL);
3593 if (state == NULL) {
3594 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3595 talloc_free(async_data);
3596 return -1;
3599 ctdb_client_async_add(async_data, state);
3602 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3603 talloc_free(async_data);
3604 return -1;
3607 talloc_free(async_data);
3608 return 0;
3611 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3612 struct ctdb_vnn_map *vnn_map,
3613 TALLOC_CTX *mem_ctx,
3614 bool include_self)
3616 int i, j, num_nodes;
3617 uint32_t *nodes;
3619 for (i=num_nodes=0;i<vnn_map->size;i++) {
3620 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3621 continue;
3623 num_nodes++;
3626 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3627 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3629 for (i=j=0;i<vnn_map->size;i++) {
3630 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3631 continue;
3633 nodes[j++] = vnn_map->map[i];
3636 return nodes;
3639 /* Get list of nodes not including those with flags specified by mask.
3640 * If exclude_pnn is not -1 then exclude that pnn from the list.
3642 uint32_t *list_of_nodes(struct ctdb_context *ctdb,
3643 struct ctdb_node_map *node_map,
3644 TALLOC_CTX *mem_ctx,
3645 uint32_t mask,
3646 int exclude_pnn)
3648 int i, j, num_nodes;
3649 uint32_t *nodes;
3651 for (i=num_nodes=0;i<node_map->num;i++) {
3652 if (node_map->nodes[i].flags & mask) {
3653 continue;
3655 if (node_map->nodes[i].pnn == exclude_pnn) {
3656 continue;
3658 num_nodes++;
3661 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3662 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3664 for (i=j=0;i<node_map->num;i++) {
3665 if (node_map->nodes[i].flags & mask) {
3666 continue;
3668 if (node_map->nodes[i].pnn == exclude_pnn) {
3669 continue;
3671 nodes[j++] = node_map->nodes[i].pnn;
3674 return nodes;
3677 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3678 struct ctdb_node_map *node_map,
3679 TALLOC_CTX *mem_ctx,
3680 bool include_self)
3682 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_INACTIVE,
3683 include_self ? -1 : ctdb->pnn);
3686 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3687 struct ctdb_node_map *node_map,
3688 TALLOC_CTX *mem_ctx,
3689 bool include_self)
3691 return list_of_nodes(ctdb, node_map, mem_ctx, NODE_FLAGS_DISCONNECTED,
3692 include_self ? -1 : ctdb->pnn);
3696 this is used to test if a pnn lock exists and if it exists will return
3697 the number of connections that pnn has reported or -1 if that recovery
3698 daemon is not running.
3701 ctdb_read_pnn_lock(int fd, int32_t pnn)
3703 struct flock lock;
3704 char c;
3706 lock.l_type = F_WRLCK;
3707 lock.l_whence = SEEK_SET;
3708 lock.l_start = pnn;
3709 lock.l_len = 1;
3710 lock.l_pid = 0;
3712 if (fcntl(fd, F_GETLK, &lock) != 0) {
3713 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3714 return -1;
3717 if (lock.l_type == F_UNLCK) {
3718 return -1;
3721 if (pread(fd, &c, 1, pnn) == -1) {
3722 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3723 return -1;
3726 return c;
3730 get capabilities of a remote node
3732 struct ctdb_client_control_state *
3733 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3735 return ctdb_control_send(ctdb, destnode, 0,
3736 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3737 mem_ctx, &timeout, NULL);
3740 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3742 int ret;
3743 int32_t res;
3744 TDB_DATA outdata;
3746 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3747 if ( (ret != 0) || (res != 0) ) {
3748 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3749 return -1;
3752 if (capabilities) {
3753 *capabilities = *((uint32_t *)outdata.dptr);
3756 return 0;
3759 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3761 struct ctdb_client_control_state *state;
3762 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3763 int ret;
3765 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3766 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3767 talloc_free(tmp_ctx);
3768 return ret;
3771 struct server_id {
3772 uint64_t pid;
3773 uint32_t task_id;
3774 uint32_t vnn;
3775 uint64_t unique_id;
3778 static struct server_id server_id_get(struct ctdb_context *ctdb, uint32_t reqid)
3780 struct server_id id;
3782 id.pid = getpid();
3783 id.task_id = reqid;
3784 id.vnn = ctdb_get_pnn(ctdb);
3785 id.unique_id = id.vnn;
3786 id.unique_id = (id.unique_id << 32) | reqid;
3788 return id;
3791 static bool server_id_equal(struct server_id *id1, struct server_id *id2)
3793 if (id1->pid != id2->pid) {
3794 return false;
3797 if (id1->task_id != id2->task_id) {
3798 return false;
3801 if (id1->vnn != id2->vnn) {
3802 return false;
3805 if (id1->unique_id != id2->unique_id) {
3806 return false;
3809 return true;
3812 static bool server_id_exists(struct ctdb_context *ctdb, struct server_id *id)
3814 struct ctdb_server_id sid;
3815 int ret;
3816 uint32_t result;
3818 sid.type = SERVER_TYPE_SAMBA;
3819 sid.pnn = id->vnn;
3820 sid.server_id = id->pid;
3822 ret = ctdb_ctrl_check_server_id(ctdb, timeval_current_ofs(3,0),
3823 id->vnn, &sid, &result);
3824 if (ret != 0) {
3825 /* If control times out, assume server_id exists. */
3826 return true;
3829 if (result) {
3830 return true;
3833 return false;
3837 enum g_lock_type {
3838 G_LOCK_READ = 0,
3839 G_LOCK_WRITE = 1,
3842 struct g_lock_rec {
3843 enum g_lock_type type;
3844 struct server_id id;
3847 struct g_lock_recs {
3848 unsigned int num;
3849 struct g_lock_rec *lock;
3852 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
3853 struct g_lock_recs **locks)
3855 struct g_lock_recs *recs;
3857 recs = talloc_zero(mem_ctx, struct g_lock_recs);
3858 if (recs == NULL) {
3859 return false;
3862 if (data.dsize == 0) {
3863 goto done;
3866 if (data.dsize % sizeof(struct g_lock_rec) != 0) {
3867 DEBUG(DEBUG_ERR, (__location__ "invalid data size %lu in g_lock record\n",
3868 (unsigned long)data.dsize));
3869 talloc_free(recs);
3870 return false;
3873 recs->num = data.dsize / sizeof(struct g_lock_rec);
3874 recs->lock = talloc_memdup(mem_ctx, data.dptr, data.dsize);
3875 if (recs->lock == NULL) {
3876 talloc_free(recs);
3877 return false;
3880 done:
3881 if (locks != NULL) {
3882 *locks = recs;
3885 return true;
3889 static bool g_lock_lock(TALLOC_CTX *mem_ctx,
3890 struct ctdb_db_context *ctdb_db,
3891 const char *keyname, uint32_t reqid)
3893 TDB_DATA key, data;
3894 struct ctdb_record_handle *h;
3895 struct g_lock_recs *locks;
3896 struct server_id id;
3897 struct timeval t_start;
3898 int i;
3900 key.dptr = (uint8_t *)discard_const(keyname);
3901 key.dsize = strlen(keyname) + 1;
3903 t_start = timeval_current();
3905 again:
3906 /* Keep trying for an hour. */
3907 if (timeval_elapsed(&t_start) > 3600) {
3908 return false;
3911 h = ctdb_fetch_lock(ctdb_db, mem_ctx, key, &data);
3912 if (h == NULL) {
3913 return false;
3916 if (!g_lock_parse(h, data, &locks)) {
3917 DEBUG(DEBUG_ERR, ("g_lock: error parsing locks\n"));
3918 talloc_free(data.dptr);
3919 talloc_free(h);
3920 return false;
3923 talloc_free(data.dptr);
3925 id = server_id_get(ctdb_db->ctdb, reqid);
3927 i = 0;
3928 while (i < locks->num) {
3929 if (server_id_equal(&locks->lock[i].id, &id)) {
3930 /* Internal error */
3931 talloc_free(h);
3932 return false;
3935 if (!server_id_exists(ctdb_db->ctdb, &locks->lock[i].id)) {
3936 if (i < locks->num-1) {
3937 locks->lock[i] = locks->lock[locks->num-1];
3939 locks->num--;
3940 continue;
3943 /* This entry is locked. */
3944 DEBUG(DEBUG_INFO, ("g_lock: lock already granted for "
3945 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
3946 (unsigned long long)id.pid,
3947 id.task_id, id.vnn,
3948 (unsigned long long)id.unique_id));
3949 talloc_free(h);
3950 goto again;
3953 locks->lock = talloc_realloc(locks, locks->lock, struct g_lock_rec,
3954 locks->num+1);
3955 if (locks->lock == NULL) {
3956 talloc_free(h);
3957 return false;
3960 locks->lock[locks->num].type = G_LOCK_WRITE;
3961 locks->lock[locks->num].id = id;
3962 locks->num++;
3964 data.dptr = (uint8_t *)locks->lock;
3965 data.dsize = locks->num * sizeof(struct g_lock_rec);
3967 if (ctdb_record_store(h, data) != 0) {
3968 DEBUG(DEBUG_ERR, ("g_lock: failed to write transaction lock for "
3969 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
3970 (unsigned long long)id.pid,
3971 id.task_id, id.vnn,
3972 (unsigned long long)id.unique_id));
3973 talloc_free(h);
3974 return false;
3977 DEBUG(DEBUG_INFO, ("g_lock: lock granted for "
3978 "pid=0x%llx taskid=%x vnn=%d id=0x%llx\n",
3979 (unsigned long long)id.pid,
3980 id.task_id, id.vnn,
3981 (unsigned long long)id.unique_id));
3983 talloc_free(h);
3984 return true;
3987 static bool g_lock_unlock(TALLOC_CTX *mem_ctx,
3988 struct ctdb_db_context *ctdb_db,
3989 const char *keyname, uint32_t reqid)
3991 TDB_DATA key, data;
3992 struct ctdb_record_handle *h;
3993 struct g_lock_recs *locks;
3994 struct server_id id;
3995 int i;
3996 bool found = false;
3998 key.dptr = (uint8_t *)discard_const(keyname);
3999 key.dsize = strlen(keyname) + 1;
4000 h = ctdb_fetch_lock(ctdb_db, mem_ctx, key, &data);
4001 if (h == NULL) {
4002 return false;
4005 if (!g_lock_parse(h, data, &locks)) {
4006 DEBUG(DEBUG_ERR, ("g_lock: error parsing locks\n"));
4007 talloc_free(data.dptr);
4008 talloc_free(h);
4009 return false;
4012 talloc_free(data.dptr);
4014 id = server_id_get(ctdb_db->ctdb, reqid);
4016 for (i=0; i<locks->num; i++) {
4017 if (server_id_equal(&locks->lock[i].id, &id)) {
4018 if (i < locks->num-1) {
4019 locks->lock[i] = locks->lock[locks->num-1];
4021 locks->num--;
4022 found = true;
4023 break;
4027 if (!found) {
4028 DEBUG(DEBUG_ERR, ("g_lock: lock not found\n"));
4029 talloc_free(h);
4030 return false;
4033 data.dptr = (uint8_t *)locks->lock;
4034 data.dsize = locks->num * sizeof(struct g_lock_rec);
4036 if (ctdb_record_store(h, data) != 0) {
4037 talloc_free(h);
4038 return false;
4041 talloc_free(h);
4042 return true;
4046 struct ctdb_transaction_handle {
4047 struct ctdb_db_context *ctdb_db;
4048 struct ctdb_db_context *g_lock_db;
4049 char *lock_name;
4050 uint32_t reqid;
4052 * we store reads and writes done under a transaction:
4053 * - one list stores both reads and writes (m_all)
4054 * - the other just writes (m_write)
4056 struct ctdb_marshall_buffer *m_all;
4057 struct ctdb_marshall_buffer *m_write;
4060 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
4062 g_lock_unlock(h, h->g_lock_db, h->lock_name, h->reqid);
4063 ctdb_reqid_remove(h->ctdb_db->ctdb, h->reqid);
4064 return 0;
4069 * start a transaction on a database
4071 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
4072 TALLOC_CTX *mem_ctx)
4074 struct ctdb_transaction_handle *h;
4075 struct ctdb_server_id id;
4077 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
4078 if (h == NULL) {
4079 DEBUG(DEBUG_ERR, (__location__ " memory allocation error\n"));
4080 return NULL;
4083 h->ctdb_db = ctdb_db;
4084 h->lock_name = talloc_asprintf(h, "transaction_db_0x%08x",
4085 (unsigned int)ctdb_db->db_id);
4086 if (h->lock_name == NULL) {
4087 DEBUG(DEBUG_ERR, (__location__ " talloc asprintf failed\n"));
4088 talloc_free(h);
4089 return NULL;
4092 h->g_lock_db = ctdb_attach(h->ctdb_db->ctdb, timeval_current_ofs(3,0),
4093 "g_lock.tdb", false, 0);
4094 if (!h->g_lock_db) {
4095 DEBUG(DEBUG_ERR, (__location__ " unable to attach to g_lock.tdb\n"));
4096 talloc_free(h);
4097 return NULL;
4100 id.type = SERVER_TYPE_SAMBA;
4101 id.pnn = ctdb_get_pnn(ctdb_db->ctdb);
4102 id.server_id = getpid();
4104 if (ctdb_ctrl_register_server_id(ctdb_db->ctdb, timeval_current_ofs(3,0),
4105 &id) != 0) {
4106 DEBUG(DEBUG_ERR, (__location__ " unable to register server id\n"));
4107 talloc_free(h);
4108 return NULL;
4111 h->reqid = ctdb_reqid_new(h->ctdb_db->ctdb, h);
4113 if (!g_lock_lock(h, h->g_lock_db, h->lock_name, h->reqid)) {
4114 DEBUG(DEBUG_ERR, (__location__ " Error locking g_lock.tdb\n"));
4115 talloc_free(h);
4116 return NULL;
4119 talloc_set_destructor(h, ctdb_transaction_destructor);
4120 return h;
4124 * fetch a record inside a transaction
4126 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
4127 TALLOC_CTX *mem_ctx,
4128 TDB_DATA key, TDB_DATA *data)
4130 struct ctdb_ltdb_header header;
4131 int ret;
4133 ZERO_STRUCT(header);
4135 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
4136 if (ret == -1 && header.dmaster == (uint32_t)-1) {
4137 /* record doesn't exist yet */
4138 *data = tdb_null;
4139 ret = 0;
4142 if (ret != 0) {
4143 return ret;
4146 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
4147 if (h->m_all == NULL) {
4148 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4149 return -1;
4152 return 0;
4156 * stores a record inside a transaction
4158 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
4159 TDB_DATA key, TDB_DATA data)
4161 TALLOC_CTX *tmp_ctx = talloc_new(h);
4162 struct ctdb_ltdb_header header;
4163 TDB_DATA olddata;
4164 int ret;
4166 /* we need the header so we can update the RSN */
4167 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
4168 if (ret == -1 && header.dmaster == (uint32_t)-1) {
4169 /* the record doesn't exist - create one with us as dmaster.
4170 This is only safe because we are in a transaction and this
4171 is a persistent database */
4172 ZERO_STRUCT(header);
4173 } else if (ret != 0) {
4174 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
4175 talloc_free(tmp_ctx);
4176 return ret;
4179 if (data.dsize == olddata.dsize &&
4180 memcmp(data.dptr, olddata.dptr, data.dsize) == 0 &&
4181 header.rsn != 0) {
4182 /* save writing the same data */
4183 talloc_free(tmp_ctx);
4184 return 0;
4187 header.dmaster = h->ctdb_db->ctdb->pnn;
4188 header.rsn++;
4190 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
4191 if (h->m_all == NULL) {
4192 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4193 talloc_free(tmp_ctx);
4194 return -1;
4197 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
4198 if (h->m_write == NULL) {
4199 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
4200 talloc_free(tmp_ctx);
4201 return -1;
4204 talloc_free(tmp_ctx);
4205 return 0;
4208 static int ctdb_fetch_db_seqnum(struct ctdb_db_context *ctdb_db, uint64_t *seqnum)
4210 const char *keyname = CTDB_DB_SEQNUM_KEY;
4211 TDB_DATA key, data;
4212 struct ctdb_ltdb_header header;
4213 int ret;
4215 key.dptr = (uint8_t *)discard_const(keyname);
4216 key.dsize = strlen(keyname) + 1;
4218 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, ctdb_db, &data);
4219 if (ret != 0) {
4220 *seqnum = 0;
4221 return 0;
4224 if (data.dsize == 0) {
4225 *seqnum = 0;
4226 return 0;
4229 if (data.dsize != sizeof(*seqnum)) {
4230 DEBUG(DEBUG_ERR, (__location__ " Invalid data recived len=%zi\n",
4231 data.dsize));
4232 talloc_free(data.dptr);
4233 return -1;
4236 *seqnum = *(uint64_t *)data.dptr;
4237 talloc_free(data.dptr);
4239 return 0;
4243 static int ctdb_store_db_seqnum(struct ctdb_transaction_handle *h,
4244 uint64_t seqnum)
4246 const char *keyname = CTDB_DB_SEQNUM_KEY;
4247 TDB_DATA key, data;
4249 key.dptr = (uint8_t *)discard_const(keyname);
4250 key.dsize = strlen(keyname) + 1;
4252 data.dptr = (uint8_t *)&seqnum;
4253 data.dsize = sizeof(seqnum);
4255 return ctdb_transaction_store(h, key, data);
4260 * commit a transaction
4262 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
4264 int ret;
4265 uint64_t old_seqnum, new_seqnum;
4266 int32_t status;
4267 struct timeval timeout;
4269 if (h->m_write == NULL) {
4270 /* no changes were made */
4271 talloc_free(h);
4272 return 0;
4275 ret = ctdb_fetch_db_seqnum(h->ctdb_db, &old_seqnum);
4276 if (ret != 0) {
4277 DEBUG(DEBUG_ERR, (__location__ " failed to fetch db sequence number\n"));
4278 ret = -1;
4279 goto done;
4282 new_seqnum = old_seqnum + 1;
4283 ret = ctdb_store_db_seqnum(h, new_seqnum);
4284 if (ret != 0) {
4285 DEBUG(DEBUG_ERR, (__location__ " failed to store db sequence number\n"));
4286 ret = -1;
4287 goto done;
4290 again:
4291 timeout = timeval_current_ofs(3,0);
4292 ret = ctdb_control(h->ctdb_db->ctdb, CTDB_CURRENT_NODE,
4293 h->ctdb_db->db_id,
4294 CTDB_CONTROL_TRANS3_COMMIT, 0,
4295 ctdb_marshall_finish(h->m_write), NULL, NULL,
4296 &status, &timeout, NULL);
4297 if (ret != 0 || status != 0) {
4299 * TRANS3_COMMIT control will only fail if recovery has been
4300 * triggered. Check if the database has been updated or not.
4302 ret = ctdb_fetch_db_seqnum(h->ctdb_db, &new_seqnum);
4303 if (ret != 0) {
4304 DEBUG(DEBUG_ERR, (__location__ " failed to fetch db sequence number\n"));
4305 goto done;
4308 if (new_seqnum == old_seqnum) {
4309 /* Database not yet updated, try again */
4310 goto again;
4313 if (new_seqnum != (old_seqnum + 1)) {
4314 DEBUG(DEBUG_ERR, (__location__ " new seqnum [%llu] != old seqnum [%llu] + 1\n",
4315 (long long unsigned)new_seqnum,
4316 (long long unsigned)old_seqnum));
4317 ret = -1;
4318 goto done;
4322 ret = 0;
4324 done:
4325 talloc_free(h);
4326 return ret;
4330 * cancel a transaction
4332 int ctdb_transaction_cancel(struct ctdb_transaction_handle *h)
4334 talloc_free(h);
4335 return 0;
4340 recovery daemon ping to main daemon
4342 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
4344 int ret;
4345 int32_t res;
4347 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
4348 ctdb, NULL, &res, NULL, NULL);
4349 if (ret != 0 || res != 0) {
4350 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
4351 return -1;
4354 return 0;
4357 /* When forking the main daemon and the child process needs to connect
4358 * back to the daemon as a client process, this function can be used
4359 * to change the ctdb context from daemon into client mode. The child
4360 * process must be created using ctdb_fork() and not fork() -
4361 * ctdb_fork() does some necessary housekeeping.
4363 int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
4365 int ret;
4366 va_list ap;
4368 /* Add extra information so we can identify this in the logs */
4369 va_start(ap, fmt);
4370 debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
4371 va_end(ap);
4373 /* get a new event context */
4374 ctdb->ev = event_context_init(ctdb);
4375 tevent_loop_allow_nesting(ctdb->ev);
4377 /* Connect to main CTDB daemon */
4378 ret = ctdb_socket_connect(ctdb);
4379 if (ret != 0) {
4380 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
4381 return -1;
4384 ctdb->can_send_controls = true;
4386 return 0;
4390 get the status of running the monitor eventscripts: NULL means never run.
4392 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
4393 struct timeval timeout, uint32_t destnode,
4394 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
4395 struct ctdb_scripts_wire **scripts)
4397 int ret;
4398 TDB_DATA outdata, indata;
4399 int32_t res;
4400 uint32_t uinttype = type;
4402 indata.dptr = (uint8_t *)&uinttype;
4403 indata.dsize = sizeof(uinttype);
4405 ret = ctdb_control(ctdb, destnode, 0,
4406 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
4407 mem_ctx, &outdata, &res, &timeout, NULL);
4408 if (ret != 0 || res != 0) {
4409 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
4410 return -1;
4413 if (outdata.dsize == 0) {
4414 *scripts = NULL;
4415 } else {
4416 *scripts = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4417 talloc_free(outdata.dptr);
4420 return 0;
4424 tell the main daemon how long it took to lock the reclock file
4426 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
4428 int ret;
4429 int32_t res;
4430 TDB_DATA data;
4432 data.dptr = (uint8_t *)&latency;
4433 data.dsize = sizeof(latency);
4435 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
4436 ctdb, NULL, &res, NULL, NULL);
4437 if (ret != 0 || res != 0) {
4438 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
4439 return -1;
4442 return 0;
4446 get the name of the reclock file
4448 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
4449 uint32_t destnode, TALLOC_CTX *mem_ctx,
4450 const char **name)
4452 int ret;
4453 int32_t res;
4454 TDB_DATA data;
4456 ret = ctdb_control(ctdb, destnode, 0,
4457 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
4458 mem_ctx, &data, &res, &timeout, NULL);
4459 if (ret != 0 || res != 0) {
4460 return -1;
4463 if (data.dsize == 0) {
4464 *name = NULL;
4465 } else {
4466 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
4468 talloc_free(data.dptr);
4470 return 0;
4474 set the reclock filename for a node
4476 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
4478 int ret;
4479 TDB_DATA data;
4480 int32_t res;
4482 if (reclock == NULL) {
4483 data.dsize = 0;
4484 data.dptr = NULL;
4485 } else {
4486 data.dsize = strlen(reclock) + 1;
4487 data.dptr = discard_const(reclock);
4490 ret = ctdb_control(ctdb, destnode, 0,
4491 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
4492 NULL, NULL, &res, &timeout, NULL);
4493 if (ret != 0 || res != 0) {
4494 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
4495 return -1;
4498 return 0;
4502 stop a node
4504 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4506 int ret;
4507 int32_t res;
4509 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
4510 ctdb, NULL, &res, &timeout, NULL);
4511 if (ret != 0 || res != 0) {
4512 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
4513 return -1;
4516 return 0;
4520 continue a node
4522 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4524 int ret;
4526 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
4527 ctdb, NULL, NULL, &timeout, NULL);
4528 if (ret != 0) {
4529 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4530 return -1;
4533 return 0;
4537 set the natgw state for a node
4539 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4541 int ret;
4542 TDB_DATA data;
4543 int32_t res;
4545 data.dsize = sizeof(natgwstate);
4546 data.dptr = (uint8_t *)&natgwstate;
4548 ret = ctdb_control(ctdb, destnode, 0,
4549 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4550 NULL, NULL, &res, &timeout, NULL);
4551 if (ret != 0 || res != 0) {
4552 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4553 return -1;
4556 return 0;
4560 set the lmaster role for a node
4562 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4564 int ret;
4565 TDB_DATA data;
4566 int32_t res;
4568 data.dsize = sizeof(lmasterrole);
4569 data.dptr = (uint8_t *)&lmasterrole;
4571 ret = ctdb_control(ctdb, destnode, 0,
4572 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4573 NULL, NULL, &res, &timeout, NULL);
4574 if (ret != 0 || res != 0) {
4575 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4576 return -1;
4579 return 0;
4583 set the recmaster role for a node
4585 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4587 int ret;
4588 TDB_DATA data;
4589 int32_t res;
4591 data.dsize = sizeof(recmasterrole);
4592 data.dptr = (uint8_t *)&recmasterrole;
4594 ret = ctdb_control(ctdb, destnode, 0,
4595 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4596 NULL, NULL, &res, &timeout, NULL);
4597 if (ret != 0 || res != 0) {
4598 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4599 return -1;
4602 return 0;
4605 /* enable an eventscript
4607 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4609 int ret;
4610 TDB_DATA data;
4611 int32_t res;
4613 data.dsize = strlen(script) + 1;
4614 data.dptr = discard_const(script);
4616 ret = ctdb_control(ctdb, destnode, 0,
4617 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4618 NULL, NULL, &res, &timeout, NULL);
4619 if (ret != 0 || res != 0) {
4620 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4621 return -1;
4624 return 0;
4627 /* disable an eventscript
4629 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4631 int ret;
4632 TDB_DATA data;
4633 int32_t res;
4635 data.dsize = strlen(script) + 1;
4636 data.dptr = discard_const(script);
4638 ret = ctdb_control(ctdb, destnode, 0,
4639 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4640 NULL, NULL, &res, &timeout, NULL);
4641 if (ret != 0 || res != 0) {
4642 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4643 return -1;
4646 return 0;
4650 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4652 int ret;
4653 TDB_DATA data;
4654 int32_t res;
4656 data.dsize = sizeof(*bantime);
4657 data.dptr = (uint8_t *)bantime;
4659 ret = ctdb_control(ctdb, destnode, 0,
4660 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4661 NULL, NULL, &res, &timeout, NULL);
4662 if (ret != 0 || res != 0) {
4663 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4664 return -1;
4667 return 0;
4671 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4673 int ret;
4674 TDB_DATA outdata;
4675 int32_t res;
4676 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4678 ret = ctdb_control(ctdb, destnode, 0,
4679 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4680 tmp_ctx, &outdata, &res, &timeout, NULL);
4681 if (ret != 0 || res != 0) {
4682 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4683 talloc_free(tmp_ctx);
4684 return -1;
4687 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4688 talloc_free(tmp_ctx);
4690 return 0;
4694 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4696 int ret;
4697 int32_t res;
4698 TDB_DATA data;
4699 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4701 data.dptr = (uint8_t*)db_prio;
4702 data.dsize = sizeof(*db_prio);
4704 ret = ctdb_control(ctdb, destnode, 0,
4705 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4706 tmp_ctx, NULL, &res, &timeout, NULL);
4707 if (ret != 0 || res != 0) {
4708 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4709 talloc_free(tmp_ctx);
4710 return -1;
4713 talloc_free(tmp_ctx);
4715 return 0;
4718 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4720 int ret;
4721 int32_t res;
4722 TDB_DATA data;
4723 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4725 data.dptr = (uint8_t*)&db_id;
4726 data.dsize = sizeof(db_id);
4728 ret = ctdb_control(ctdb, destnode, 0,
4729 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4730 tmp_ctx, NULL, &res, &timeout, NULL);
4731 if (ret != 0 || res < 0) {
4732 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_db_priority failed\n"));
4733 talloc_free(tmp_ctx);
4734 return -1;
4737 if (priority) {
4738 *priority = res;
4741 talloc_free(tmp_ctx);
4743 return 0;
4746 int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
4748 int ret;
4749 TDB_DATA outdata;
4750 int32_t res;
4752 ret = ctdb_control(ctdb, destnode, 0,
4753 CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
4754 mem_ctx, &outdata, &res, &timeout, NULL);
4755 if (ret != 0 || res != 0 || outdata.dsize == 0) {
4756 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
4757 return -1;
4760 *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
4761 talloc_free(outdata.dptr);
4763 return 0;
4766 struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h)
4768 if (h == NULL) {
4769 return NULL;
4772 return &h->header;
4776 struct ctdb_client_control_state *
4777 ctdb_ctrl_updaterecord_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4779 struct ctdb_client_control_state *handle;
4780 struct ctdb_marshall_buffer *m;
4781 struct ctdb_rec_data *rec;
4782 TDB_DATA outdata;
4784 m = talloc_zero(mem_ctx, struct ctdb_marshall_buffer);
4785 if (m == NULL) {
4786 DEBUG(DEBUG_ERR, ("Failed to allocate marshall buffer for update record\n"));
4787 return NULL;
4790 m->db_id = ctdb_db->db_id;
4792 rec = ctdb_marshall_record(m, 0, key, header, data);
4793 if (rec == NULL) {
4794 DEBUG(DEBUG_ERR,("Failed to marshall record for update record\n"));
4795 talloc_free(m);
4796 return NULL;
4798 m = talloc_realloc_size(mem_ctx, m, rec->length + offsetof(struct ctdb_marshall_buffer, data));
4799 if (m == NULL) {
4800 DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata\n"));
4801 talloc_free(m);
4802 return NULL;
4804 m->count++;
4805 memcpy((uint8_t *)m + offsetof(struct ctdb_marshall_buffer, data), rec, rec->length);
4808 outdata.dptr = (uint8_t *)m;
4809 outdata.dsize = talloc_get_size(m);
4811 handle = ctdb_control_send(ctdb, destnode, 0,
4812 CTDB_CONTROL_UPDATE_RECORD, 0, outdata,
4813 mem_ctx, &timeout, NULL);
4814 talloc_free(m);
4815 return handle;
4818 int ctdb_ctrl_updaterecord_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4820 int ret;
4821 int32_t res;
4823 ret = ctdb_control_recv(ctdb, state, state, NULL, &res, NULL);
4824 if ( (ret != 0) || (res != 0) ){
4825 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_update_record_recv failed\n"));
4826 return -1;
4829 return 0;
4833 ctdb_ctrl_updaterecord(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data)
4835 struct ctdb_client_control_state *state;
4837 state = ctdb_ctrl_updaterecord_send(ctdb, mem_ctx, timeout, destnode, ctdb_db, key, header, data);
4838 return ctdb_ctrl_updaterecord_recv(ctdb, state);
4847 set a database to be readonly
4849 struct ctdb_client_control_state *
4850 ctdb_ctrl_set_db_readonly_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4852 TDB_DATA data;
4854 data.dptr = (uint8_t *)&dbid;
4855 data.dsize = sizeof(dbid);
4857 return ctdb_control_send(ctdb, destnode, 0,
4858 CTDB_CONTROL_SET_DB_READONLY, 0, data,
4859 ctdb, NULL, NULL);
4862 int ctdb_ctrl_set_db_readonly_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4864 int ret;
4865 int32_t res;
4867 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4868 if (ret != 0 || res != 0) {
4869 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_readonly_recv failed ret:%d res:%d\n", ret, res));
4870 return -1;
4873 return 0;
4876 int ctdb_ctrl_set_db_readonly(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4878 struct ctdb_client_control_state *state;
4880 state = ctdb_ctrl_set_db_readonly_send(ctdb, destnode, dbid);
4881 return ctdb_ctrl_set_db_readonly_recv(ctdb, state);
4885 set a database to be sticky
4887 struct ctdb_client_control_state *
4888 ctdb_ctrl_set_db_sticky_send(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4890 TDB_DATA data;
4892 data.dptr = (uint8_t *)&dbid;
4893 data.dsize = sizeof(dbid);
4895 return ctdb_control_send(ctdb, destnode, 0,
4896 CTDB_CONTROL_SET_DB_STICKY, 0, data,
4897 ctdb, NULL, NULL);
4900 int ctdb_ctrl_set_db_sticky_recv(struct ctdb_context *ctdb, struct ctdb_client_control_state *state)
4902 int ret;
4903 int32_t res;
4905 ret = ctdb_control_recv(ctdb, state, ctdb, NULL, &res, NULL);
4906 if (ret != 0 || res != 0) {
4907 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_set_db_sticky_recv failed ret:%d res:%d\n", ret, res));
4908 return -1;
4911 return 0;
4914 int ctdb_ctrl_set_db_sticky(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid)
4916 struct ctdb_client_control_state *state;
4918 state = ctdb_ctrl_set_db_sticky_send(ctdb, destnode, dbid);
4919 return ctdb_ctrl_set_db_sticky_recv(ctdb, state);