smbd: Use %ju/uintmax_t in source3/locking
[Samba.git] / ctdb / server / ctdb_freeze.c
blob42a12850ac3bea7bca8541d99cb6dff5d44b71ba
1 /*
2 ctdb freeze handling
4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "includes.h"
20 #include "tdb.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "../include/ctdb_private.h"
25 #include "lib/util/dlinklist.h"
26 #include "db_wrap.h"
27 #include "../common/rb_tree.h"
30 a list of control requests waiting for a freeze lock child to get
31 the database locks
33 struct ctdb_freeze_waiter {
34 struct ctdb_freeze_waiter *next, *prev;
35 struct ctdb_context *ctdb;
36 struct ctdb_req_control *c;
37 uint32_t priority;
38 int32_t status;
41 /* a handle to a freeze lock child process */
42 struct ctdb_freeze_handle {
43 struct ctdb_context *ctdb;
44 uint32_t priority;
45 struct lock_request *lreq;
46 struct ctdb_freeze_waiter *waiters;
50 destroy a freeze handle
51 */
52 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
54 struct ctdb_context *ctdb = h->ctdb;
55 struct ctdb_db_context *ctdb_db;
57 DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
59 /* cancel any pending transactions */
60 if (ctdb->freeze_transaction_started) {
61 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
62 if (ctdb_db->priority != h->priority) {
63 continue;
65 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
66 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
67 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
68 ctdb_db->db_name));
70 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
72 ctdb->freeze_transaction_started = false;
75 ctdb->freeze_mode[h->priority] = CTDB_FREEZE_NONE;
76 ctdb->freeze_handles[h->priority] = NULL;
78 ctdb_lock_free_request_context(h->lreq);
79 return 0;
83 called when the child writes its status to us
85 static void ctdb_freeze_lock_handler(void *private_data, bool locked)
87 struct ctdb_freeze_handle *h = talloc_get_type_abort(private_data,
88 struct ctdb_freeze_handle);
89 struct ctdb_freeze_waiter *w;
91 if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
92 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
93 talloc_free(h);
94 return;
97 if (!locked) {
98 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
99 /* we didn't get the locks - destroy the handle */
100 talloc_free(h);
101 return;
104 h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
106 /* notify the waiters */
107 if (h != h->ctdb->freeze_handles[h->priority]) {
108 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
110 while ((w = h->waiters)) {
111 w->status = 0;
112 DLIST_REMOVE(h->waiters, w);
113 talloc_free(w);
118 destroy a waiter for a freeze mode change
120 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
122 ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
123 return 0;
127 start the freeze process for a certain priority
129 void ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
131 struct ctdb_freeze_handle *h;
133 if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
134 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
135 ctdb_fatal(ctdb, "Internal error");
138 if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
139 /* we're already frozen */
140 return;
143 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
145 /* Stop any vacuuming going on: we don't want to wait. */
146 ctdb_stop_vacuuming(ctdb);
148 /* if there isn't a freeze lock child then create one */
149 if (ctdb->freeze_handles[priority] == NULL) {
150 h = talloc_zero(ctdb, struct ctdb_freeze_handle);
151 CTDB_NO_MEMORY_FATAL(ctdb, h);
152 h->ctdb = ctdb;
153 h->priority = priority;
154 talloc_set_destructor(h, ctdb_freeze_handle_destructor);
156 h->lreq = ctdb_lock_alldb_prio(ctdb, priority, false, ctdb_freeze_lock_handler, h);
157 CTDB_NO_MEMORY_FATAL(ctdb, h->lreq);
158 ctdb->freeze_handles[priority] = h;
159 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
164 freeze the databases
166 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
168 struct ctdb_freeze_waiter *w;
169 uint32_t priority;
171 priority = (uint32_t)c->srvid;
173 if (priority == 0) {
174 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
175 priority = 1;
178 if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
179 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
180 return -1;
183 if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
184 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
185 /* we're already frozen */
186 return 0;
189 ctdb_start_freeze(ctdb, priority);
191 /* add ourselves to list of waiters */
192 if (ctdb->freeze_handles[priority] == NULL) {
193 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
194 return -1;
197 w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
198 CTDB_NO_MEMORY(ctdb, w);
199 w->ctdb = ctdb;
200 w->c = talloc_steal(w, c);
201 w->priority = priority;
202 w->status = -1;
203 talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
204 DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
206 /* we won't reply till later */
207 *async_reply = true;
208 return 0;
213 block until we are frozen, used during daemon startup
215 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
217 int i;
219 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
220 ctdb_start_freeze(ctdb, i);
222 /* block until frozen */
223 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
224 event_loop_once(ctdb->ev);
228 return true;
232 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
234 DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
236 /* cancel any pending transactions */
237 if (ctdb->freeze_transaction_started) {
238 struct ctdb_db_context *ctdb_db;
240 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
241 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
242 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
243 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
244 ctdb_db->db_name));
246 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
249 ctdb->freeze_transaction_started = false;
251 #if 0
252 /* this hack can be used to get a copy of the databases at the end of a recovery */
253 system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
254 #endif
256 #if 0
257 /* and this one for local testing */
258 system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
259 #endif
261 if (ctdb->freeze_handles[priority] != NULL) {
262 talloc_free(ctdb->freeze_handles[priority]);
263 ctdb->freeze_handles[priority] = NULL;
268 thaw the databases
270 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority,
271 bool check_recmode)
273 if (priority > NUM_DB_PRIORITIES) {
274 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n",
275 priority));
276 return -1;
279 if (check_recmode && ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) {
280 DEBUG(DEBUG_ERR, ("Failing to thaw databases while "
281 "recovery is active\n"));
282 return -1;
285 if (priority == 0) {
286 int i;
287 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
288 thaw_priority(ctdb, i);
290 } else {
291 thaw_priority(ctdb, priority);
294 ctdb_call_resend_all(ctdb);
295 return 0;
300 start a transaction on all databases - used for recovery
302 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
304 struct ctdb_db_context *ctdb_db;
305 int i;
307 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
308 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
309 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
310 return -1;
314 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
315 int ret;
317 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
319 if (ctdb->freeze_transaction_started) {
320 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
321 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
322 ctdb_db->db_name));
323 /* not a fatal error */
327 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
329 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
331 if (ret != 0) {
332 DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
333 ctdb_db->db_name));
334 return -1;
338 ctdb->freeze_transaction_started = true;
339 ctdb->freeze_transaction_id = id;
341 return 0;
345 cancel a transaction for all databases - used for recovery
347 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
349 struct ctdb_db_context *ctdb_db;
351 DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
353 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
354 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
356 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
357 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n", ctdb_db->db_name));
358 /* not a fatal error */
361 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
364 ctdb->freeze_transaction_started = false;
366 return 0;
370 commit transactions on all databases
372 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
374 struct ctdb_db_context *ctdb_db;
375 int i;
376 int healthy_nodes = 0;
378 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
379 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
380 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
381 return -1;
385 if (!ctdb->freeze_transaction_started) {
386 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
387 return -1;
390 if (id != ctdb->freeze_transaction_id) {
391 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
392 return -1;
395 DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
396 for (i=0; i < ctdb->num_nodes; i++) {
397 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
398 i, ctdb->nodes[i]->flags));
399 if (ctdb->nodes[i]->flags == 0) {
400 healthy_nodes++;
403 DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
405 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
406 int ret;
408 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
409 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
410 if (ret != 0) {
411 DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
412 ctdb_db->db_name));
413 goto fail;
415 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
417 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
418 if (ret != 0) {
419 DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
420 "Cancel all remaining transactions and resetting transaction_started to false.\n",
421 ctdb_db->db_name));
422 goto fail;
426 ctdb->freeze_transaction_started = false;
427 ctdb->freeze_transaction_id = 0;
429 return 0;
431 fail:
432 /* cancel any pending transactions */
433 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
434 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
435 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
436 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
437 ctdb_db->db_name));
439 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
441 ctdb->freeze_transaction_started = false;
443 return -1;
447 wipe a database - only possible when in a frozen transaction
449 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
451 struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
452 struct ctdb_db_context *ctdb_db;
454 ctdb_db = find_ctdb_db(ctdb, w.db_id);
455 if (!ctdb_db) {
456 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
457 return -1;
460 if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
461 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
462 return -1;
465 if (!ctdb->freeze_transaction_started) {
466 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
467 return -1;
470 if (w.transaction_id != ctdb->freeze_transaction_id) {
471 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
472 return -1;
475 if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
476 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
477 ctdb_db->db_name));
478 return -1;
481 if (!ctdb_db->persistent) {
482 talloc_free(ctdb_db->delete_queue);
483 ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
484 if (ctdb_db->delete_queue == NULL) {
485 DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
486 "the vacuum tree.\n"));
487 return -1;
491 return 0;