s3:smbd: rename has_ctdb_public_ip to has_cluster_movable_ip
[Samba.git] / ctdb / server / ctdb_lock.c
blob478447d76f80b21d7b26c68ebcbc9aaf26adbb1e
1 /*
2 ctdb lock handling
3 provide API to do non-blocking locks for single or all databases
5 Copyright (C) Amitay Isaacs 2012
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "replace.h"
21 #include "system/filesys.h"
22 #include "system/network.h"
24 #include <talloc.h>
25 #include <tevent.h>
27 #include "lib/tdb_wrap/tdb_wrap.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/debug.h"
30 #include "lib/util/samba_util.h"
31 #include "lib/util/sys_rw.h"
33 #include "ctdb_private.h"
35 #include "common/common.h"
36 #include "common/logging.h"
39 * Non-blocking Locking API
41 * 1. Create a child process to do blocking locks.
42 * 2. Once the locks are obtained, signal parent process via fd.
43 * 3. Invoke registered callback routine with locking status.
44 * 4. If the child process cannot get locks within certain time,
45 * execute an external script to debug.
47 * ctdb_lock_record() - get a lock on a record
48 * ctdb_lock_db() - get a lock on a DB
50 * auto_mark - whether to mark/unmark DBs in before/after callback
51 * = false is used for freezing databases for
52 * recovery since the recovery cannot start till
53 * databases are locked on all the nodes.
54 * = true is used for record locks.
57 enum lock_type {
58 LOCK_RECORD,
59 LOCK_DB,
62 static const char * const lock_type_str[] = {
63 "lock_record",
64 "lock_db",
67 struct lock_request;
69 /* lock_context is the common part for a lock request */
70 struct lock_context {
71 struct lock_context *next, *prev;
72 enum lock_type type;
73 struct ctdb_context *ctdb;
74 struct ctdb_db_context *ctdb_db;
75 TDB_DATA key;
76 uint32_t priority;
77 bool auto_mark;
78 struct lock_request *request;
79 pid_t child;
80 int fd[2];
81 struct tevent_fd *tfd;
82 struct tevent_timer *ttimer;
83 struct timeval start_time;
84 uint32_t key_hash;
85 bool can_schedule;
88 /* lock_request is the client specific part for a lock request */
89 struct lock_request {
90 struct lock_context *lctx;
91 void (*callback)(void *, bool);
92 void *private_data;
96 int ctdb_db_iterator(struct ctdb_context *ctdb, ctdb_db_handler_t handler,
97 void *private_data)
99 struct ctdb_db_context *ctdb_db;
100 int ret;
102 for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
103 ret = handler(ctdb_db, private_data);
104 if (ret != 0) {
105 return -1;
109 return 0;
113 * lock all databases - mark only
115 static int db_lock_mark_handler(struct ctdb_db_context *ctdb_db,
116 void *private_data)
118 int tdb_transaction_write_lock_mark(struct tdb_context *);
120 DEBUG(DEBUG_INFO, ("marking locked database %s\n", ctdb_db->db_name));
122 if (tdb_transaction_write_lock_mark(ctdb_db->ltdb->tdb) != 0) {
123 DEBUG(DEBUG_ERR, ("Failed to mark (transaction lock) database %s\n",
124 ctdb_db->db_name));
125 return -1;
128 if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) {
129 DEBUG(DEBUG_ERR, ("Failed to mark (all lock) database %s\n",
130 ctdb_db->db_name));
131 return -1;
134 return 0;
137 int ctdb_lockdb_mark(struct ctdb_db_context *ctdb_db)
139 if (!ctdb_db_frozen(ctdb_db)) {
140 DEBUG(DEBUG_ERR,
141 ("Attempt to mark database locked when not frozen\n"));
142 return -1;
145 return db_lock_mark_handler(ctdb_db, NULL);
149 * lock all databases - unmark only
151 static int db_lock_unmark_handler(struct ctdb_db_context *ctdb_db,
152 void *private_data)
154 int tdb_transaction_write_lock_unmark(struct tdb_context *);
156 DEBUG(DEBUG_INFO, ("unmarking locked database %s\n", ctdb_db->db_name));
158 if (tdb_transaction_write_lock_unmark(ctdb_db->ltdb->tdb) != 0) {
159 DEBUG(DEBUG_ERR, ("Failed to unmark (transaction lock) database %s\n",
160 ctdb_db->db_name));
161 return -1;
164 if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) {
165 DEBUG(DEBUG_ERR, ("Failed to unmark (all lock) database %s\n",
166 ctdb_db->db_name));
167 return -1;
170 return 0;
173 int ctdb_lockdb_unmark(struct ctdb_db_context *ctdb_db)
175 if (!ctdb_db_frozen(ctdb_db)) {
176 DEBUG(DEBUG_ERR,
177 ("Attempt to unmark database locked when not frozen\n"));
178 return -1;
181 return db_lock_unmark_handler(ctdb_db, NULL);
184 static void ctdb_lock_schedule(struct ctdb_context *ctdb);
187 * Destructor to kill the child locking process
189 static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
191 if (lock_ctx->request) {
192 lock_ctx->request->lctx = NULL;
194 if (lock_ctx->child > 0) {
195 ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
196 if (lock_ctx->type == LOCK_RECORD) {
197 DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
198 } else {
199 DLIST_REMOVE(lock_ctx->ctdb->lock_current, lock_ctx);
201 if (lock_ctx->ctdb_db->lock_num_current == 0) {
202 ctdb_fatal(NULL, "Lock count is 0 before decrement\n");
204 lock_ctx->ctdb_db->lock_num_current--;
205 CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_current);
206 CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
207 } else {
208 if (lock_ctx->type == LOCK_RECORD) {
209 DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
210 } else {
211 DLIST_REMOVE(lock_ctx->ctdb->lock_pending, lock_ctx);
213 CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
214 CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
217 ctdb_lock_schedule(lock_ctx->ctdb);
219 return 0;
224 * Destructor to remove lock request
226 static int ctdb_lock_request_destructor(struct lock_request *lock_request)
228 if (lock_request->lctx == NULL) {
229 return 0;
232 lock_request->lctx->request = NULL;
233 TALLOC_FREE(lock_request->lctx);
235 return 0;
239 * Process all the callbacks waiting for lock
241 * If lock has failed, callback is executed with locked=false
243 static void process_callbacks(struct lock_context *lock_ctx, bool locked)
245 struct lock_request *request;
246 bool auto_mark = lock_ctx->auto_mark;
248 if (auto_mark && locked) {
249 switch (lock_ctx->type) {
250 case LOCK_RECORD:
251 tdb_chainlock_mark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
252 break;
254 case LOCK_DB:
255 (void)ctdb_lockdb_mark(lock_ctx->ctdb_db);
256 break;
260 request = lock_ctx->request;
261 if (auto_mark) {
262 /* Since request may be freed in the callback, unset the lock
263 * context, so request destructor will not free lock context.
265 request->lctx = NULL;
268 /* Since request may be freed in the callback, unset the request */
269 lock_ctx->request = NULL;
271 request->callback(request->private_data, locked);
273 if (!auto_mark) {
274 return;
277 if (locked) {
278 switch (lock_ctx->type) {
279 case LOCK_RECORD:
280 tdb_chainlock_unmark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key);
281 break;
283 case LOCK_DB:
284 ctdb_lockdb_unmark(lock_ctx->ctdb_db);
285 break;
289 talloc_free(lock_ctx);
293 static int lock_bucket_id(double t)
295 double ms = 1.e-3, s = 1;
296 int id;
298 if (t < 1*ms) {
299 id = 0;
300 } else if (t < 10*ms) {
301 id = 1;
302 } else if (t < 100*ms) {
303 id = 2;
304 } else if (t < 1*s) {
305 id = 3;
306 } else if (t < 2*s) {
307 id = 4;
308 } else if (t < 4*s) {
309 id = 5;
310 } else if (t < 8*s) {
311 id = 6;
312 } else if (t < 16*s) {
313 id = 7;
314 } else if (t < 32*s) {
315 id = 8;
316 } else if (t < 64*s) {
317 id = 9;
318 } else {
319 id = 10;
322 return id;
326 * Callback routine when the required locks are obtained.
327 * Called from parent context
329 static void ctdb_lock_handler(struct tevent_context *ev,
330 struct tevent_fd *tfd,
331 uint16_t flags,
332 void *private_data)
334 struct lock_context *lock_ctx;
335 char c;
336 bool locked;
337 double t;
338 int id;
340 lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
342 /* cancel the timeout event */
343 TALLOC_FREE(lock_ctx->ttimer);
345 t = timeval_elapsed(&lock_ctx->start_time);
346 id = lock_bucket_id(t);
348 /* Read the status from the child process */
349 if (sys_read(lock_ctx->fd[0], &c, 1) != 1) {
350 locked = false;
351 } else {
352 locked = (c == 0 ? true : false);
355 /* Update statistics */
356 CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_calls);
357 CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_calls);
359 if (locked) {
360 CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
361 CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db,
362 lock_type_str[lock_ctx->type], locks.latency,
363 lock_ctx->start_time);
365 CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
366 CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
367 } else {
368 CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_failed);
369 CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_failed);
372 process_callbacks(lock_ctx, locked);
375 struct lock_log_entry {
376 struct db_hash_context *lock_log;
377 TDB_DATA key;
378 unsigned long log_sec;
379 struct tevent_timer *timer;
382 static int lock_log_fetch_parser(uint8_t *keybuf, size_t keylen,
383 uint8_t *databuf, size_t datalen,
384 void *private_data)
386 struct lock_log_entry **entry =
387 (struct lock_log_entry **)private_data;
389 if (datalen != sizeof(struct lock_log_entry *)) {
390 return EINVAL;
393 *entry = talloc_get_type_abort(*(void **)databuf,
394 struct lock_log_entry);
395 return 0;
398 static void lock_log_cleanup(struct tevent_context *ev,
399 struct tevent_timer *ttimer,
400 struct timeval current_time,
401 void *private_data)
403 struct lock_log_entry *entry = talloc_get_type_abort(
404 private_data, struct lock_log_entry);
405 int ret;
407 entry->timer = NULL;
409 ret = db_hash_delete(entry->lock_log, entry->key.dptr,
410 entry->key.dsize);
411 if (ret != 0) {
412 return;
414 talloc_free(entry);
417 static bool lock_log_skip(struct tevent_context *ev,
418 struct db_hash_context *lock_log,
419 TDB_DATA key, unsigned long elapsed_sec)
421 struct lock_log_entry *entry = NULL;
422 int ret;
424 ret = db_hash_fetch(lock_log, key.dptr, key.dsize,
425 lock_log_fetch_parser, &entry);
426 if (ret == ENOENT) {
428 entry = talloc_zero(lock_log, struct lock_log_entry);
429 if (entry == NULL) {
430 goto fail;
433 entry->lock_log = lock_log;
435 entry->key.dptr = talloc_memdup(entry, key.dptr, key.dsize);
436 if (entry->key.dptr == NULL) {
437 talloc_free(entry);
438 goto fail;
440 entry->key.dsize = key.dsize;
442 entry->log_sec = elapsed_sec;
443 entry->timer = tevent_add_timer(ev, entry,
444 timeval_current_ofs(30, 0),
445 lock_log_cleanup, entry);
446 if (entry->timer == NULL) {
447 talloc_free(entry);
448 goto fail;
451 ret = db_hash_add(lock_log, key.dptr, key.dsize,
452 (uint8_t *)&entry,
453 sizeof(struct lock_log_entry *));
454 if (ret != 0) {
455 talloc_free(entry);
456 goto fail;
459 return false;
461 } else if (ret == EINVAL) {
463 ret = db_hash_delete(lock_log, key.dptr, key.dsize);
464 if (ret != 0) {
465 goto fail;
468 return false;
470 } else if (ret == 0) {
472 if (elapsed_sec <= entry->log_sec) {
473 return true;
476 entry->log_sec = elapsed_sec;
478 TALLOC_FREE(entry->timer);
479 entry->timer = tevent_add_timer(ev, entry,
480 timeval_current_ofs(30, 0),
481 lock_log_cleanup, entry);
482 if (entry->timer == NULL) {
483 ret = db_hash_delete(lock_log, key.dptr, key.dsize);
484 if (ret != 0) {
485 goto fail;
487 talloc_free(entry);
490 return false;
494 fail:
495 return false;
500 * Callback routine when required locks are not obtained within timeout
501 * Called from parent context
503 static void ctdb_lock_timeout_handler(struct tevent_context *ev,
504 struct tevent_timer *ttimer,
505 struct timeval current_time,
506 void *private_data)
508 static char debug_locks[PATH_MAX+1] = "";
509 struct lock_context *lock_ctx;
510 struct ctdb_context *ctdb;
511 pid_t pid;
512 double elapsed_time;
513 bool skip;
514 char *keystr;
516 lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
517 ctdb = lock_ctx->ctdb;
519 elapsed_time = timeval_elapsed(&lock_ctx->start_time);
521 /* For database locks, always log */
522 if (lock_ctx->type == LOCK_DB) {
523 DEBUG(DEBUG_WARNING,
524 ("Unable to get DB lock on database %s for "
525 "%.0lf seconds\n",
526 lock_ctx->ctdb_db->db_name, elapsed_time));
527 goto lock_debug;
530 /* For record locks, check if we have already logged */
531 skip = lock_log_skip(ev, lock_ctx->ctdb_db->lock_log,
532 lock_ctx->key, (unsigned long)elapsed_time);
533 if (skip) {
534 goto skip_lock_debug;
537 keystr = hex_encode_talloc(lock_ctx, lock_ctx->key.dptr,
538 lock_ctx->key.dsize);
539 DEBUG(DEBUG_WARNING,
540 ("Unable to get RECORD lock on database %s for %.0lf seconds"
541 " (key %s)\n",
542 lock_ctx->ctdb_db->db_name, elapsed_time,
543 keystr ? keystr : ""));
544 TALLOC_FREE(keystr);
546 /* If a node stopped/banned, don't spam the logs */
547 if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
548 goto skip_lock_debug;
551 lock_debug:
553 if (ctdb_set_helper("lock debugging helper",
554 debug_locks, sizeof(debug_locks),
555 "CTDB_DEBUG_LOCKS",
556 getenv("CTDB_BASE"), "debug_locks.sh")) {
557 pid = vfork();
558 if (pid == 0) {
559 execl(debug_locks, debug_locks, NULL);
560 _exit(0);
562 ctdb_track_child(ctdb, pid);
563 } else {
564 DEBUG(DEBUG_WARNING,
565 (__location__
566 " Unable to setup lock debugging\n"));
569 skip_lock_debug:
571 /* reset the timeout timer */
572 // talloc_free(lock_ctx->ttimer);
573 lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
574 lock_ctx,
575 timeval_current_ofs(10, 0),
576 ctdb_lock_timeout_handler,
577 (void *)lock_ctx);
580 static bool lock_helper_args(TALLOC_CTX *mem_ctx,
581 struct lock_context *lock_ctx, int fd,
582 int *argc, const char ***argv)
584 const char **args = NULL;
585 int nargs = 0, i;
587 switch (lock_ctx->type) {
588 case LOCK_RECORD:
589 nargs = 6;
590 break;
592 case LOCK_DB:
593 nargs = 5;
594 break;
597 /* Add extra argument for null termination */
598 nargs++;
600 args = talloc_array(mem_ctx, const char *, nargs);
601 if (args == NULL) {
602 return false;
605 args[0] = talloc_asprintf(args, "%d", getpid());
606 args[1] = talloc_asprintf(args, "%d", fd);
608 switch (lock_ctx->type) {
609 case LOCK_RECORD:
610 args[2] = talloc_strdup(args, "RECORD");
611 args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
612 args[4] = talloc_asprintf(args, "0x%x",
613 tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
614 if (lock_ctx->key.dsize == 0) {
615 args[5] = talloc_strdup(args, "NULL");
616 } else {
617 args[5] = hex_encode_talloc(args, lock_ctx->key.dptr, lock_ctx->key.dsize);
619 break;
621 case LOCK_DB:
622 args[2] = talloc_strdup(args, "DB");
623 args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
624 args[4] = talloc_asprintf(args, "0x%x",
625 tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
626 break;
629 /* Make sure last argument is NULL */
630 args[nargs-1] = NULL;
632 for (i=0; i<nargs-1; i++) {
633 if (args[i] == NULL) {
634 talloc_free(args);
635 return false;
639 *argc = nargs;
640 *argv = args;
641 return true;
645 * Find a lock request that can be scheduled
647 static struct lock_context *ctdb_find_lock_context(struct ctdb_context *ctdb)
649 struct lock_context *lock_ctx, *next_ctx;
650 struct ctdb_db_context *ctdb_db;
652 /* First check if there are database lock requests */
654 for (lock_ctx = ctdb->lock_pending; lock_ctx != NULL;
655 lock_ctx = next_ctx) {
657 if (lock_ctx->request != NULL) {
658 /* Found a lock context with a request */
659 return lock_ctx;
662 next_ctx = lock_ctx->next;
664 DEBUG(DEBUG_INFO, ("Removing lock context without lock "
665 "request\n"));
666 DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
667 CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
668 CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
669 talloc_free(lock_ctx);
672 /* Next check database queues */
673 for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
674 if (ctdb_db->lock_num_current ==
675 ctdb->tunable.lock_processes_per_db) {
676 continue;
679 for (lock_ctx = ctdb_db->lock_pending; lock_ctx != NULL;
680 lock_ctx = next_ctx) {
682 next_ctx = lock_ctx->next;
684 if (lock_ctx->request != NULL) {
685 return lock_ctx;
688 DEBUG(DEBUG_INFO, ("Removing lock context without "
689 "lock request\n"));
690 DLIST_REMOVE(ctdb_db->lock_pending, lock_ctx);
691 CTDB_DECREMENT_STAT(ctdb, locks.num_pending);
692 CTDB_DECREMENT_DB_STAT(ctdb_db, locks.num_pending);
693 talloc_free(lock_ctx);
697 return NULL;
701 * Schedule a new lock child process
702 * Set up callback handler and timeout handler
704 static void ctdb_lock_schedule(struct ctdb_context *ctdb)
706 struct lock_context *lock_ctx;
707 int ret, argc;
708 TALLOC_CTX *tmp_ctx;
709 static char prog[PATH_MAX+1] = "";
710 const char **args;
712 if (!ctdb_set_helper("lock helper",
713 prog, sizeof(prog),
714 "CTDB_LOCK_HELPER",
715 CTDB_HELPER_BINDIR, "ctdb_lock_helper")) {
716 ctdb_die(ctdb, __location__
717 " Unable to set lock helper\n");
720 /* Find a lock context with requests */
721 lock_ctx = ctdb_find_lock_context(ctdb);
722 if (lock_ctx == NULL) {
723 return;
726 lock_ctx->child = -1;
727 ret = pipe(lock_ctx->fd);
728 if (ret != 0) {
729 DEBUG(DEBUG_ERR, ("Failed to create pipe in ctdb_lock_schedule\n"));
730 return;
733 set_close_on_exec(lock_ctx->fd[0]);
735 /* Create data for child process */
736 tmp_ctx = talloc_new(lock_ctx);
737 if (tmp_ctx == NULL) {
738 DEBUG(DEBUG_ERR, ("Failed to allocate memory for helper args\n"));
739 close(lock_ctx->fd[0]);
740 close(lock_ctx->fd[1]);
741 return;
744 if (! ctdb->do_setsched) {
745 ret = setenv("CTDB_NOSETSCHED", "1", 1);
746 if (ret != 0) {
747 DEBUG(DEBUG_WARNING,
748 ("Failed to set CTDB_NOSETSCHED variable\n"));
752 /* Create arguments for lock helper */
753 if (!lock_helper_args(tmp_ctx, lock_ctx, lock_ctx->fd[1],
754 &argc, &args)) {
755 DEBUG(DEBUG_ERR, ("Failed to create lock helper args\n"));
756 close(lock_ctx->fd[0]);
757 close(lock_ctx->fd[1]);
758 talloc_free(tmp_ctx);
759 return;
762 lock_ctx->child = ctdb_vfork_exec(lock_ctx, ctdb, prog, argc,
763 (const char **)args);
764 if (lock_ctx->child == -1) {
765 DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
766 close(lock_ctx->fd[0]);
767 close(lock_ctx->fd[1]);
768 talloc_free(tmp_ctx);
769 return;
772 /* Parent process */
773 close(lock_ctx->fd[1]);
775 talloc_free(tmp_ctx);
777 /* Set up timeout handler */
778 lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
779 lock_ctx,
780 timeval_current_ofs(10, 0),
781 ctdb_lock_timeout_handler,
782 (void *)lock_ctx);
783 if (lock_ctx->ttimer == NULL) {
784 ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
785 lock_ctx->child = -1;
786 close(lock_ctx->fd[0]);
787 return;
790 /* Set up callback */
791 lock_ctx->tfd = tevent_add_fd(ctdb->ev,
792 lock_ctx,
793 lock_ctx->fd[0],
794 TEVENT_FD_READ,
795 ctdb_lock_handler,
796 (void *)lock_ctx);
797 if (lock_ctx->tfd == NULL) {
798 TALLOC_FREE(lock_ctx->ttimer);
799 ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
800 lock_ctx->child = -1;
801 close(lock_ctx->fd[0]);
802 return;
804 tevent_fd_set_auto_close(lock_ctx->tfd);
806 /* Move the context from pending to current */
807 if (lock_ctx->type == LOCK_RECORD) {
808 DLIST_REMOVE(lock_ctx->ctdb_db->lock_pending, lock_ctx);
809 DLIST_ADD_END(lock_ctx->ctdb_db->lock_current, lock_ctx);
810 } else {
811 DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
812 DLIST_ADD_END(ctdb->lock_current, lock_ctx);
814 CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_pending);
815 CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
816 lock_ctx->ctdb_db->lock_num_current++;
817 CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_pending);
818 CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
823 * Lock record / db depending on type
825 static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx,
826 struct ctdb_context *ctdb,
827 struct ctdb_db_context *ctdb_db,
828 TDB_DATA key,
829 uint32_t priority,
830 void (*callback)(void *, bool),
831 void *private_data,
832 enum lock_type type,
833 bool auto_mark)
835 struct lock_context *lock_ctx = NULL;
836 struct lock_request *request;
838 if (callback == NULL) {
839 DEBUG(DEBUG_WARNING, ("No callback function specified, not locking\n"));
840 return NULL;
843 lock_ctx = talloc_zero(ctdb, struct lock_context);
844 if (lock_ctx == NULL) {
845 DEBUG(DEBUG_ERR, ("Failed to create a new lock context\n"));
846 return NULL;
849 if ((request = talloc_zero(mem_ctx, struct lock_request)) == NULL) {
850 talloc_free(lock_ctx);
851 return NULL;
854 lock_ctx->type = type;
855 lock_ctx->ctdb = ctdb;
856 lock_ctx->ctdb_db = ctdb_db;
857 lock_ctx->key.dsize = key.dsize;
858 if (key.dsize > 0) {
859 lock_ctx->key.dptr = talloc_memdup(lock_ctx, key.dptr, key.dsize);
860 if (lock_ctx->key.dptr == NULL) {
861 DEBUG(DEBUG_ERR, (__location__ "Memory allocation error\n"));
862 talloc_free(lock_ctx);
863 talloc_free(request);
864 return NULL;
866 lock_ctx->key_hash = ctdb_hash(&key);
867 } else {
868 lock_ctx->key.dptr = NULL;
870 lock_ctx->priority = priority;
871 lock_ctx->auto_mark = auto_mark;
873 lock_ctx->request = request;
874 lock_ctx->child = -1;
876 /* Non-record locks are required by recovery and should be scheduled
877 * immediately, so keep them at the head of the pending queue.
879 if (lock_ctx->type == LOCK_RECORD) {
880 DLIST_ADD_END(ctdb_db->lock_pending, lock_ctx);
881 } else {
882 DLIST_ADD_END(ctdb->lock_pending, lock_ctx);
884 CTDB_INCREMENT_STAT(ctdb, locks.num_pending);
885 if (ctdb_db) {
886 CTDB_INCREMENT_DB_STAT(ctdb_db, locks.num_pending);
889 /* Start the timer when we activate the context */
890 lock_ctx->start_time = timeval_current();
892 request->lctx = lock_ctx;
893 request->callback = callback;
894 request->private_data = private_data;
896 talloc_set_destructor(request, ctdb_lock_request_destructor);
897 talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor);
899 ctdb_lock_schedule(ctdb);
901 return request;
906 * obtain a lock on a record in a database
908 struct lock_request *ctdb_lock_record(TALLOC_CTX *mem_ctx,
909 struct ctdb_db_context *ctdb_db,
910 TDB_DATA key,
911 bool auto_mark,
912 void (*callback)(void *, bool),
913 void *private_data)
915 return ctdb_lock_internal(mem_ctx,
916 ctdb_db->ctdb,
917 ctdb_db,
918 key,
920 callback,
921 private_data,
922 LOCK_RECORD,
923 auto_mark);
928 * obtain a lock on a database
930 struct lock_request *ctdb_lock_db(TALLOC_CTX *mem_ctx,
931 struct ctdb_db_context *ctdb_db,
932 bool auto_mark,
933 void (*callback)(void *, bool),
934 void *private_data)
936 return ctdb_lock_internal(mem_ctx,
937 ctdb_db->ctdb,
938 ctdb_db,
939 tdb_null,
941 callback,
942 private_data,
943 LOCK_DB,
944 auto_mark);