4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
26 #include "lib/tdb/include/tdb.h"
27 #include "../include/ctdb_private.h"
29 struct ctdb_persistent_state
{
30 struct ctdb_context
*ctdb
;
31 struct ctdb_db_context
*ctdb_db
; /* used by trans3_commit */
32 struct ctdb_client
*client
; /* used by trans3_commit */
33 struct ctdb_req_control
*c
;
37 uint32_t num_failed
, num_sent
;
41 1) all nodes fail, and all nodes reply
42 2) some nodes fail, all nodes reply
48 called when a node has acknowledged a ctdb_control_update_record call
50 static void ctdb_persistent_callback(struct ctdb_context
*ctdb
,
51 int32_t status
, TDB_DATA data
,
55 struct ctdb_persistent_state
*state
= talloc_get_type(private_data
,
56 struct ctdb_persistent_state
);
57 enum ctdb_trans2_commit_error etype
;
59 if (ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
60 DEBUG(DEBUG_INFO
, ("ctdb_persistent_callback: ignoring reply "
61 "during recovery\n"));
66 DEBUG(DEBUG_ERR
,("ctdb_persistent_callback failed with status %d (%s)\n",
67 status
, errormsg
?errormsg
:"no error message given"));
68 state
->status
= status
;
69 state
->errormsg
= errormsg
;
73 * If a node failed to complete the update_record control,
74 * then either a recovery is already running or something
75 * bad is going on. So trigger a recovery and let the
76 * recovery finish the transaction, sending back the reply
77 * for the trans3_commit control to the client.
79 ctdb
->recovery_mode
= CTDB_RECOVERY_ACTIVE
;
85 if (state
->num_pending
!= 0) {
89 if (state
->num_failed
== state
->num_sent
) {
90 etype
= CTDB_TRANS2_COMMIT_ALLFAIL
;
91 } else if (state
->num_failed
!= 0) {
92 etype
= CTDB_TRANS2_COMMIT_SOMEFAIL
;
94 etype
= CTDB_TRANS2_COMMIT_SUCCESS
;
97 ctdb_request_control_reply(state
->ctdb
, state
->c
, NULL
, etype
, state
->errormsg
);
102 called if persistent store times out
104 static void ctdb_persistent_store_timeout(struct event_context
*ev
, struct timed_event
*te
,
105 struct timeval t
, void *private_data
)
107 struct ctdb_persistent_state
*state
= talloc_get_type(private_data
, struct ctdb_persistent_state
);
109 if (state
->ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
110 DEBUG(DEBUG_INFO
, ("ctdb_persistent_store_timeout: ignoring "
111 "timeout during recovery\n"));
115 ctdb_request_control_reply(state
->ctdb
, state
->c
, NULL
, CTDB_TRANS2_COMMIT_TIMEOUT
,
116 "timeout in ctdb_persistent_state");
122 * Finish pending trans3 commit controls, i.e. send
123 * reply to the client. This is called by the end-recovery
124 * control to fix the situation when a recovery interrupts
125 * the usual porgress of a transaction.
127 void ctdb_persistent_finish_trans3_commits(struct ctdb_context
*ctdb
)
129 struct ctdb_db_context
*ctdb_db
;
131 if (ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
132 DEBUG(DEBUG_INFO
, ("ctdb_persistent_store_timeout: ignoring "
133 "timeout during recovery\n"));
137 for (ctdb_db
= ctdb
->db_list
; ctdb_db
; ctdb_db
= ctdb_db
->next
) {
138 struct ctdb_persistent_state
*state
;
140 if (ctdb_db
->persistent_state
== NULL
) {
144 state
= ctdb_db
->persistent_state
;
146 ctdb_request_control_reply(ctdb
, state
->c
, NULL
,
147 CTDB_TRANS2_COMMIT_SOMEFAIL
,
148 "trans3 commit ended by recovery");
150 /* The destructor sets ctdb_db->persistent_state to NULL. */
156 store a set of persistent records - called from a ctdb client when it has updated
157 some records in a persistent database. The client will have the record
158 locked for the duration of this call. The client is the dmaster when
161 int32_t ctdb_control_trans2_commit(struct ctdb_context
*ctdb
,
162 struct ctdb_req_control
*c
,
163 TDB_DATA recdata
, bool *async_reply
)
165 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
166 struct ctdb_persistent_state
*state
;
168 struct ctdb_marshall_buffer
*m
= (struct ctdb_marshall_buffer
*)recdata
.dptr
;
169 struct ctdb_db_context
*ctdb_db
;
171 ctdb_db
= find_ctdb_db(ctdb
, m
->db_id
);
172 if (ctdb_db
== NULL
) {
173 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans2_commit: "
174 "Unknown database db_id[0x%08x]\n", m
->db_id
));
178 if (client
== NULL
) {
179 DEBUG(DEBUG_ERR
,(__location__
" can not match persistent_store to a client. Returning error\n"));
183 if (ctdb_db
->unhealthy_reason
) {
184 DEBUG(DEBUG_ERR
,("db(%s) unhealty in ctdb_control_trans2_commit: %s\n",
185 ctdb_db
->db_name
, ctdb_db
->unhealthy_reason
));
189 /* handling num_persistent_updates is a bit strange -
191 1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
192 They don't expect num_persistent_updates to be used at all
194 2) less old clients, which uses CTDB_CONTROL_START_PERSISTENT_UPDATE, and expected
195 this commit to then decrement it
197 3) new clients which use TRANS2 commit functions, and
198 expect this function to increment the counter, and
199 then have it decremented in ctdb_control_trans2_error
200 or ctdb_control_trans2_finished
203 case CTDB_CONTROL_PERSISTENT_STORE
:
204 if (ctdb_db
->transaction_active
) {
205 DEBUG(DEBUG_ERR
, (__location__
" trans2_commit: a "
206 "transaction is active on database "
207 "db_id[0x%08x] - refusing persistent "
208 " store for client id[0x%08x]\n",
209 ctdb_db
->db_id
, client
->client_id
));
212 if (client
->num_persistent_updates
> 0) {
213 client
->num_persistent_updates
--;
216 case CTDB_CONTROL_TRANS2_COMMIT
:
217 if (ctdb_db
->transaction_active
) {
218 DEBUG(DEBUG_ERR
,(__location__
" trans2_commit: there is"
219 " already a transaction commit "
220 "active on db_id[0x%08x] - forbidding "
221 "client_id[0x%08x] to commit\n",
222 ctdb_db
->db_id
, client
->client_id
));
225 if (client
->db_id
!= 0) {
226 DEBUG(DEBUG_ERR
,(__location__
" ERROR: trans2_commit: "
227 "client-db_id[0x%08x] != 0 "
228 "(client_id[0x%08x])\n",
229 client
->db_id
, client
->client_id
));
232 client
->num_persistent_updates
++;
233 ctdb_db
->transaction_active
= true;
234 client
->db_id
= m
->db_id
;
235 DEBUG(DEBUG_DEBUG
, (__location__
" client id[0x%08x] started to"
236 " commit transaction on db id[0x%08x]\n",
237 client
->client_id
, client
->db_id
));
239 case CTDB_CONTROL_TRANS2_COMMIT_RETRY
:
240 /* already updated from the first commit */
241 if (client
->db_id
!= m
->db_id
) {
242 DEBUG(DEBUG_ERR
,(__location__
" ERROR: trans2_commit "
243 "retry: client-db_id[0x%08x] != "
244 "db_id[0x%08x] (client_id[0x%08x])\n",
246 m
->db_id
, client
->client_id
));
249 DEBUG(DEBUG_DEBUG
, (__location__
" client id[0x%08x] started "
250 "transaction commit retry on "
252 client
->client_id
, client
->db_id
));
256 if (ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
257 DEBUG(DEBUG_INFO
,("rejecting ctdb_control_trans2_commit when recovery active\n"));
261 state
= talloc_zero(ctdb
, struct ctdb_persistent_state
);
262 CTDB_NO_MEMORY(ctdb
, state
);
267 for (i
=0;i
<ctdb
->vnn_map
->size
;i
++) {
268 struct ctdb_node
*node
= ctdb
->nodes
[ctdb
->vnn_map
->map
[i
]];
271 /* only send to active nodes */
272 if (node
->flags
& NODE_FLAGS_INACTIVE
) {
276 /* don't send to ourselves */
277 if (node
->pnn
== ctdb
->pnn
) {
281 ret
= ctdb_daemon_send_control(ctdb
, node
->pnn
, 0, CTDB_CONTROL_UPDATE_RECORD
,
282 c
->client_id
, 0, recdata
,
283 ctdb_persistent_callback
, state
);
285 DEBUG(DEBUG_ERR
,("Unable to send CTDB_CONTROL_UPDATE_RECORD to pnn %u\n", node
->pnn
));
290 state
->num_pending
++;
294 if (state
->num_pending
== 0) {
299 /* we need to wait for the replies */
302 /* need to keep the control structure around */
303 talloc_steal(state
, c
);
305 /* but we won't wait forever */
306 event_add_timed(ctdb
->ev
, state
,
307 timeval_current_ofs(ctdb
->tunable
.control_timeout
, 0),
308 ctdb_persistent_store_timeout
, state
);
313 static int ctdb_persistent_state_destructor(struct ctdb_persistent_state
*state
)
315 if (state
->client
!= NULL
) {
316 state
->client
->db_id
= 0;
319 if (state
->ctdb_db
!= NULL
) {
320 state
->ctdb_db
->persistent_state
= NULL
;
327 * Store a set of persistent records.
328 * This is used to roll out a transaction to all nodes.
330 int32_t ctdb_control_trans3_commit(struct ctdb_context
*ctdb
,
331 struct ctdb_req_control
*c
,
332 TDB_DATA recdata
, bool *async_reply
)
334 struct ctdb_client
*client
;
335 struct ctdb_persistent_state
*state
;
337 struct ctdb_marshall_buffer
*m
= (struct ctdb_marshall_buffer
*)recdata
.dptr
;
338 struct ctdb_db_context
*ctdb_db
;
340 if (ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
341 DEBUG(DEBUG_INFO
,("rejecting ctdb_control_trans3_commit when recovery active\n"));
345 client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
346 if (client
== NULL
) {
347 DEBUG(DEBUG_ERR
,(__location__
" can not match persistent_store "
348 "to a client. Returning error\n"));
352 if (client
->db_id
!= 0) {
353 DEBUG(DEBUG_ERR
,(__location__
" ERROR: trans3_commit: "
354 "client-db_id[0x%08x] != 0 "
355 "(client_id[0x%08x]): trans3_commit active?\n",
356 client
->db_id
, client
->client_id
));
360 ctdb_db
= find_ctdb_db(ctdb
, m
->db_id
);
361 if (ctdb_db
== NULL
) {
362 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans3_commit: "
363 "Unknown database db_id[0x%08x]\n", m
->db_id
));
367 if (ctdb_db
->persistent_state
!= NULL
) {
368 DEBUG(DEBUG_ERR
, (__location__
" Error: "
369 "ctdb_control_trans3_commit "
370 "called while a transaction commit is "
371 "active. db_id[0x%08x]\n", m
->db_id
));
375 ctdb_db
->persistent_state
= talloc_zero(ctdb_db
,
376 struct ctdb_persistent_state
);
377 CTDB_NO_MEMORY(ctdb
, ctdb_db
->persistent_state
);
379 client
->db_id
= m
->db_id
;
381 state
= ctdb_db
->persistent_state
;
383 state
->ctdb_db
= ctdb_db
;
385 state
->client
= client
;
387 talloc_set_destructor(state
, ctdb_persistent_state_destructor
);
389 for (i
= 0; i
< ctdb
->vnn_map
->size
; i
++) {
390 struct ctdb_node
*node
= ctdb
->nodes
[ctdb
->vnn_map
->map
[i
]];
393 /* only send to active nodes */
394 if (node
->flags
& NODE_FLAGS_INACTIVE
) {
398 ret
= ctdb_daemon_send_control(ctdb
, node
->pnn
, 0,
399 CTDB_CONTROL_UPDATE_RECORD
,
400 c
->client_id
, 0, recdata
,
401 ctdb_persistent_callback
,
404 DEBUG(DEBUG_ERR
,("Unable to send "
405 "CTDB_CONTROL_UPDATE_RECORD "
406 "to pnn %u\n", node
->pnn
));
411 state
->num_pending
++;
415 if (state
->num_pending
== 0) {
420 /* we need to wait for the replies */
423 /* need to keep the control structure around */
424 talloc_steal(state
, c
);
426 /* but we won't wait forever */
427 event_add_timed(ctdb
->ev
, state
,
428 timeval_current_ofs(ctdb
->tunable
.control_timeout
, 0),
429 ctdb_persistent_store_timeout
, state
);
436 called when a client has finished a local commit in a transaction to
437 a persistent database
439 int32_t ctdb_control_trans2_finished(struct ctdb_context
*ctdb
,
440 struct ctdb_req_control
*c
)
442 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
443 struct ctdb_db_context
*ctdb_db
;
445 ctdb_db
= find_ctdb_db(ctdb
, client
->db_id
);
446 if (ctdb_db
== NULL
) {
447 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans2_finish "
448 "Unknown database 0x%08x\n", client
->db_id
));
451 if (!ctdb_db
->transaction_active
) {
452 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans2_finish: "
453 "Database 0x%08x has no transaction commit "
454 "started\n", client
->db_id
));
458 ctdb_db
->transaction_active
= false;
461 if (client
->num_persistent_updates
== 0) {
462 DEBUG(DEBUG_ERR
, (__location__
" ERROR: num_persistent_updates == 0\n"));
463 DEBUG(DEBUG_ERR
,(__location__
" Forcing recovery\n"));
464 client
->ctdb
->recovery_mode
= CTDB_RECOVERY_ACTIVE
;
467 client
->num_persistent_updates
--;
469 DEBUG(DEBUG_DEBUG
, (__location__
" client id[0x%08x] finished "
470 "transaction commit db_id[0x%08x]\n",
471 client
->client_id
, ctdb_db
->db_id
));
477 called when a client gets an error committing its database
478 during a transaction commit
480 int32_t ctdb_control_trans2_error(struct ctdb_context
*ctdb
,
481 struct ctdb_req_control
*c
)
483 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
484 struct ctdb_db_context
*ctdb_db
;
486 ctdb_db
= find_ctdb_db(ctdb
, client
->db_id
);
487 if (ctdb_db
== NULL
) {
488 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans2_error: "
489 "Unknown database 0x%08x\n", client
->db_id
));
492 if (!ctdb_db
->transaction_active
) {
493 DEBUG(DEBUG_ERR
,(__location__
" ctdb_control_trans2_error: "
494 "Database 0x%08x has no transaction commit "
495 "started\n", client
->db_id
));
499 ctdb_db
->transaction_active
= false;
502 if (client
->num_persistent_updates
== 0) {
503 DEBUG(DEBUG_ERR
, (__location__
" ERROR: num_persistent_updates == 0\n"));
505 client
->num_persistent_updates
--;
508 DEBUG(DEBUG_ERR
,(__location__
" An error occurred during transaction on"
509 " db_id[0x%08x] - forcing recovery\n",
511 client
->ctdb
->recovery_mode
= CTDB_RECOVERY_ACTIVE
;
517 * Tell whether a transaction is active on this node on the give DB.
519 int32_t ctdb_control_trans2_active(struct ctdb_context
*ctdb
,
520 struct ctdb_req_control
*c
,
523 struct ctdb_db_context
*ctdb_db
;
524 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
526 ctdb_db
= find_ctdb_db(ctdb
, db_id
);
528 DEBUG(DEBUG_ERR
,(__location__
" Unknown db 0x%08x\n", db_id
));
532 if (client
->db_id
== db_id
) {
536 if (ctdb_db
->transaction_active
) {
544 backwards compatibility:
546 start a persistent store operation. passing both the key, header and
547 data to the daemon. If the client disconnects before it has issued
548 a persistent_update call to the daemon we trigger a full recovery
549 to ensure the databases are brought back in sync.
550 for now we ignore the recdata that the client has passed to us.
552 int32_t ctdb_control_start_persistent_update(struct ctdb_context
*ctdb
,
553 struct ctdb_req_control
*c
,
556 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
558 if (client
== NULL
) {
559 DEBUG(DEBUG_ERR
,(__location__
" can not match start_persistent_update to a client. Returning error\n"));
563 client
->num_persistent_updates
++;
569 backwards compatibility:
571 called to tell ctdbd that it is no longer doing a persistent update
573 int32_t ctdb_control_cancel_persistent_update(struct ctdb_context
*ctdb
,
574 struct ctdb_req_control
*c
,
577 struct ctdb_client
*client
= ctdb_reqid_find(ctdb
, c
->client_id
, struct ctdb_client
);
579 if (client
== NULL
) {
580 DEBUG(DEBUG_ERR
,(__location__
" can not match cancel_persistent_update to a client. Returning error\n"));
584 if (client
->num_persistent_updates
> 0) {
585 client
->num_persistent_updates
--;
593 backwards compatibility:
595 single record varient of ctdb_control_trans2_commit for older clients
597 int32_t ctdb_control_persistent_store(struct ctdb_context
*ctdb
,
598 struct ctdb_req_control
*c
,
599 TDB_DATA recdata
, bool *async_reply
)
601 struct ctdb_marshall_buffer
*m
;
602 struct ctdb_rec_data
*rec
= (struct ctdb_rec_data
*)recdata
.dptr
;
605 if (recdata
.dsize
!= offsetof(struct ctdb_rec_data
, data
) +
606 rec
->keylen
+ rec
->datalen
) {
607 DEBUG(DEBUG_ERR
, (__location__
" Bad data size in recdata\n"));
611 key
.dptr
= &rec
->data
[0];
612 key
.dsize
= rec
->keylen
;
613 data
.dptr
= &rec
->data
[rec
->keylen
];
614 data
.dsize
= rec
->datalen
;
616 m
= ctdb_marshall_add(c
, NULL
, rec
->reqid
, rec
->reqid
, key
, NULL
, data
);
617 CTDB_NO_MEMORY(ctdb
, m
);
619 return ctdb_control_trans2_commit(ctdb
, c
, ctdb_marshall_finish(m
), async_reply
);
622 static int32_t ctdb_get_db_seqnum(struct ctdb_context
*ctdb
,
627 struct ctdb_db_context
*ctdb_db
;
628 const char *keyname
= CTDB_DB_SEQNUM_KEY
;
631 TALLOC_CTX
*mem_ctx
= talloc_new(ctdb
);
632 struct ctdb_ltdb_header header
;
634 ctdb_db
= find_ctdb_db(ctdb
, db_id
);
636 DEBUG(DEBUG_ERR
,(__location__
" Unknown db 0x%08x\n", db_id
));
641 key
.dptr
= (uint8_t *)discard_const(keyname
);
642 key
.dsize
= strlen(keyname
) + 1;
644 ret
= (int32_t)ctdb_ltdb_fetch(ctdb_db
, key
, &header
, mem_ctx
, &data
);
649 if (data
.dsize
!= sizeof(uint64_t)) {
654 *seqnum
= *(uint64_t *)data
.dptr
;
657 talloc_free(mem_ctx
);
662 * Get the sequence number of a persistent database.
664 int32_t ctdb_control_get_db_seqnum(struct ctdb_context
*ctdb
,
672 db_id
= *(uint32_t *)indata
.dptr
;
673 ret
= ctdb_get_db_seqnum(ctdb
, db_id
, &seqnum
);
678 outdata
->dsize
= sizeof(uint64_t);
679 outdata
->dptr
= (uint8_t *)talloc_zero(outdata
, uint64_t);
680 if (outdata
->dptr
== NULL
) {
685 *(outdata
->dptr
) = seqnum
;