libndr: add support for relative_rap_convert.
[Samba/ekacnet.git] / source3 / lib / g_lock.c
blobd7828e228f383270a9a1b78b50b7c0bd8ecd127a
1 /*
2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "includes.h"
21 #include "g_lock.h"
22 #include "librpc/gen_ndr/messaging.h"
24 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
25 struct server_id pid);
27 struct g_lock_ctx {
28 struct db_context *db;
29 struct messaging_context *msg;
33 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
34 * lockname. The record contains an array of "struct g_lock_rec"
35 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
38 struct g_lock_rec {
39 enum g_lock_type lock_type;
40 struct server_id pid;
43 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
44 struct messaging_context *msg)
46 struct g_lock_ctx *result;
48 result = talloc(mem_ctx, struct g_lock_ctx);
49 if (result == NULL) {
50 return NULL;
52 result->msg = msg;
54 result->db = db_open(result, lock_path("g_lock.tdb"), 0,
55 TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
56 if (result->db == NULL) {
57 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
58 TALLOC_FREE(result);
59 return NULL;
61 return result;
64 static bool g_lock_conflicts(enum g_lock_type lock_type,
65 const struct g_lock_rec *rec)
67 enum g_lock_type rec_lock = rec->lock_type;
69 if ((rec_lock & G_LOCK_PENDING) != 0) {
70 return false;
74 * Only tested write locks so far. Very likely this routine
75 * needs to be fixed for read locks....
77 if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
78 return false;
80 return true;
83 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
84 int *pnum_locks, struct g_lock_rec **plocks)
86 int i, num_locks;
87 struct g_lock_rec *locks;
89 if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
90 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
91 return false;
94 num_locks = data.dsize / sizeof(struct g_lock_rec);
95 locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
96 if (locks == NULL) {
97 DEBUG(1, ("talloc failed\n"));
98 return false;
101 memcpy(locks, data.dptr, data.dsize);
103 DEBUG(10, ("locks:\n"));
104 for (i=0; i<num_locks; i++) {
105 DEBUGADD(10, ("%s: %s %s\n",
106 procid_str(talloc_tos(), &locks[i].pid),
107 ((locks[i].lock_type & 1) == G_LOCK_READ) ?
108 "read" : "write",
109 (locks[i].lock_type & G_LOCK_PENDING) ?
110 "(pending)" : "(owner)"));
112 if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
113 && !process_exists(locks[i].pid)) {
115 DEBUGADD(10, ("lock owner %s died -- discarding\n",
116 procid_str(talloc_tos(),
117 &locks[i].pid)));
119 if (i < (num_locks-1)) {
120 locks[i] = locks[num_locks-1];
122 num_locks -= 1;
126 *plocks = locks;
127 *pnum_locks = num_locks;
128 return true;
131 static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
133 int i, num_locks;
135 num_locks = *pnum_locks;
137 DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
139 for (i=0; i<num_locks; i++) {
140 if (process_exists(locks[i].pid)) {
141 continue;
143 DEBUGADD(10, ("%s does not exist -- discarding\n",
144 procid_str(talloc_tos(), &locks[i].pid)));
146 if (i < (num_locks-1)) {
147 locks[i] = locks[num_locks-1];
149 num_locks -= 1;
151 *pnum_locks = num_locks;
152 return;
155 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
156 struct g_lock_rec *locks,
157 int *pnum_locks,
158 const struct server_id pid,
159 enum g_lock_type lock_type)
161 struct g_lock_rec *result;
162 int num_locks = *pnum_locks;
164 result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
165 num_locks+1);
166 if (result == NULL) {
167 return NULL;
170 result[num_locks].pid = pid;
171 result[num_locks].lock_type = lock_type;
172 *pnum_locks += 1;
173 return result;
176 static void g_lock_got_retry(struct messaging_context *msg,
177 void *private_data,
178 uint32_t msg_type,
179 struct server_id server_id,
180 DATA_BLOB *data);
182 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
183 enum g_lock_type lock_type)
185 struct db_record *rec = NULL;
186 struct g_lock_rec *locks = NULL;
187 int i, num_locks;
188 struct server_id self;
189 int our_index;
190 TDB_DATA data;
191 NTSTATUS status = NT_STATUS_OK;
192 NTSTATUS store_status;
194 again:
195 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
196 string_term_tdb_data(name));
197 if (rec == NULL) {
198 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
199 status = NT_STATUS_LOCK_NOT_GRANTED;
200 goto done;
203 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
204 DEBUG(10, ("g_lock_parse for %s failed\n", name));
205 status = NT_STATUS_INTERNAL_ERROR;
206 goto done;
209 self = procid_self();
210 our_index = -1;
212 for (i=0; i<num_locks; i++) {
213 if (procid_equal(&self, &locks[i].pid)) {
214 if (our_index != -1) {
215 DEBUG(1, ("g_lock_trylock: Added ourself "
216 "twice!\n"));
217 status = NT_STATUS_INTERNAL_ERROR;
218 goto done;
220 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
221 DEBUG(1, ("g_lock_trylock: Found ourself not "
222 "pending!\n"));
223 status = NT_STATUS_INTERNAL_ERROR;
224 goto done;
227 our_index = i;
229 /* never conflict with ourself */
230 continue;
232 if (g_lock_conflicts(lock_type, &locks[i])) {
233 struct server_id pid = locks[i].pid;
235 if (!process_exists(pid)) {
236 TALLOC_FREE(locks);
237 TALLOC_FREE(rec);
238 status = g_lock_force_unlock(ctx, name, pid);
239 if (!NT_STATUS_IS_OK(status)) {
240 DEBUG(1, ("Could not unlock dead lock "
241 "holder!\n"));
242 goto done;
244 goto again;
246 lock_type |= G_LOCK_PENDING;
250 if (our_index == -1) {
251 /* First round, add ourself */
253 locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
254 self, lock_type);
255 if (locks == NULL) {
256 DEBUG(10, ("g_lock_addrec failed\n"));
257 status = NT_STATUS_NO_MEMORY;
258 goto done;
260 } else {
262 * Retry. We were pending last time. Overwrite the
263 * stored lock_type with what we calculated, we might
264 * have acquired the lock this time.
266 locks[our_index].lock_type = lock_type;
269 if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
271 * Walk through the list of locks, search for dead entries
273 g_lock_cleanup(&num_locks, locks);
276 data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
277 store_status = rec->store(rec, data, 0);
278 if (!NT_STATUS_IS_OK(store_status)) {
279 DEBUG(1, ("rec->store failed: %s\n",
280 nt_errstr(store_status)));
281 status = store_status;
284 done:
285 TALLOC_FREE(locks);
286 TALLOC_FREE(rec);
288 if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
289 return STATUS_PENDING;
292 return NT_STATUS_OK;
295 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
296 enum g_lock_type lock_type, struct timeval timeout)
298 struct tevent_timer *te = NULL;
299 NTSTATUS status;
300 bool retry = false;
301 struct timeval timeout_end;
302 struct timeval time_now;
304 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
305 name));
307 if (lock_type & ~1) {
308 DEBUG(1, ("Got invalid lock type %d for %s\n",
309 (int)lock_type, name));
310 return NT_STATUS_INVALID_PARAMETER;
313 #ifdef CLUSTER_SUPPORT
314 if (lp_clustering()) {
315 status = ctdb_watch_us(messaging_ctdbd_connection());
316 if (!NT_STATUS_IS_OK(status)) {
317 DEBUG(10, ("could not register retry with ctdb: %s\n",
318 nt_errstr(status)));
319 goto done;
322 #endif
324 status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
325 g_lock_got_retry);
326 if (!NT_STATUS_IS_OK(status)) {
327 DEBUG(10, ("messaging_register failed: %s\n",
328 nt_errstr(status)));
329 return status;
332 time_now = timeval_current();
333 timeout_end = timeval_sum(&time_now, &timeout);
335 while (true) {
336 #ifdef CLUSTER_SUPPORT
337 fd_set _r_fds;
338 #endif
339 fd_set *r_fds = NULL;
340 int max_fd = 0;
341 int ret;
342 struct timeval timeout_remaining, select_timeout;
344 status = g_lock_trylock(ctx, name, lock_type);
345 if (NT_STATUS_IS_OK(status)) {
346 DEBUG(10, ("Got lock %s\n", name));
347 break;
349 if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
350 DEBUG(10, ("g_lock_trylock failed: %s\n",
351 nt_errstr(status)));
352 break;
355 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
357 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
358 * !!! HACK ALERT --- FIX ME !!!
359 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
360 * What we really want to do here is to react to
361 * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
362 * by a client doing g_lock_unlock or by ourselves when
363 * we receive a CTDB_SRVID_SAMBA_NOTIFY or
364 * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
365 * either a client holding a lock or a complete node
366 * has died.
368 * Doing this properly involves calling tevent_loop_once(),
369 * but doing this here with the main ctdbd messaging context
370 * creates a nested event loop when g_lock_lock() is called
371 * from the main event loop, e.g. in a tcon_and_X where the
372 * share_info.tdb needs to be initialized and is locked by
373 * another process, or when the remore registry is accessed
374 * for writing and some other process already holds a lock
375 * on the registry.tdb.
377 * So as a quick fix, we act a little coarsely here: we do
378 * a select on the ctdb connection fd and when it is readable
379 * or we get EINTR, then we retry without actually parsing
380 * any ctdb packages or dispatching messages. This means that
381 * we retry more often than intended by design, but this does
382 * not harm and it is unobtrusive. When we have finished,
383 * the main loop will pick up all the messages and ctdb
384 * packets. The only extra twist is that we cannot use timed
385 * events here but have to handcode a timeout.
388 #ifdef CLUSTER_SUPPORT
389 if (lp_clustering()) {
390 struct ctdbd_connection *conn = messaging_ctdbd_connection();
392 r_fds = &_r_fds;
393 FD_ZERO(r_fds);
394 max_fd = ctdbd_conn_get_fd(conn);
395 FD_SET(max_fd, r_fds);
397 #endif
399 time_now = timeval_current();
400 timeout_remaining = timeval_until(&time_now, &timeout_end);
401 select_timeout = timeval_set(60, 0);
403 select_timeout = timeval_min(&select_timeout,
404 &timeout_remaining);
406 ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
407 &select_timeout);
408 if (ret == -1) {
409 if (errno != EINTR) {
410 DEBUG(1, ("error calling select: %s\n",
411 strerror(errno)));
412 status = NT_STATUS_INTERNAL_ERROR;
413 break;
416 * errno == EINTR:
417 * This means a signal was received.
418 * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
419 * ==> retry
421 } else if (ret == 0) {
422 if (timeval_expired(&timeout_end)) {
423 DEBUG(10, ("g_lock_lock timed out\n"));
424 status = NT_STATUS_LOCK_NOT_GRANTED;
425 break;
426 } else {
427 DEBUG(10, ("select returned 0 but timeout not "
428 "not expired, retrying\n"));
430 } else if (ret != 1) {
431 DEBUG(1, ("invalid return code of select: %d\n", ret));
432 status = NT_STATUS_INTERNAL_ERROR;
433 break;
436 * ret == 1:
437 * This means ctdbd has sent us some data.
438 * Might be a CTDB_SRVID_RECONFIGURE or a
439 * CTDB_SRVID_SAMBA_NOTIFY message.
440 * ==> retry
444 #ifdef CLUSTER_SUPPORT
445 done:
446 #endif
448 if (!NT_STATUS_IS_OK(status)) {
449 NTSTATUS unlock_status;
451 unlock_status = g_lock_unlock(ctx, name);
453 if (!NT_STATUS_IS_OK(unlock_status)) {
454 DEBUG(1, ("Could not remove ourself from the locking "
455 "db: %s\n", nt_errstr(status)));
459 messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
460 TALLOC_FREE(te);
462 return status;
465 static void g_lock_got_retry(struct messaging_context *msg,
466 void *private_data,
467 uint32_t msg_type,
468 struct server_id server_id,
469 DATA_BLOB *data)
471 bool *pretry = (bool *)private_data;
473 DEBUG(10, ("Got retry message from pid %s\n",
474 procid_str(talloc_tos(), &server_id)));
476 *pretry = true;
479 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
480 struct server_id pid)
482 struct db_record *rec = NULL;
483 struct g_lock_rec *locks = NULL;
484 int i, num_locks;
485 enum g_lock_type lock_type;
486 NTSTATUS status;
488 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
489 string_term_tdb_data(name));
490 if (rec == NULL) {
491 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
492 status = NT_STATUS_INTERNAL_ERROR;
493 goto done;
496 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
497 DEBUG(10, ("g_lock_parse for %s failed\n", name));
498 status = NT_STATUS_INTERNAL_ERROR;
499 goto done;
502 for (i=0; i<num_locks; i++) {
503 if (procid_equal(&pid, &locks[i].pid)) {
504 break;
508 if (i == num_locks) {
509 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
510 status = NT_STATUS_INTERNAL_ERROR;
511 goto done;
514 lock_type = locks[i].lock_type;
516 if (i < (num_locks-1)) {
517 locks[i] = locks[num_locks-1];
519 num_locks -= 1;
521 if (num_locks == 0) {
522 status = rec->delete_rec(rec);
523 } else {
524 TDB_DATA data;
525 data = make_tdb_data((uint8_t *)locks,
526 sizeof(struct g_lock_rec) * num_locks);
527 status = rec->store(rec, data, 0);
530 if (!NT_STATUS_IS_OK(status)) {
531 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
532 nt_errstr(status)));
533 goto done;
536 TALLOC_FREE(rec);
538 if ((lock_type & G_LOCK_PENDING) == 0) {
539 int num_wakeups = 0;
542 * We've been the lock holder. Others to retry. Don't
543 * tell all others to avoid a thundering herd. In case
544 * this leads to a complete stall because we miss some
545 * processes, the loop in g_lock_lock tries at least
546 * once a minute.
549 for (i=0; i<num_locks; i++) {
550 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
551 continue;
553 if (!process_exists(locks[i].pid)) {
554 continue;
558 * Ping all waiters to retry
560 status = messaging_send(ctx->msg, locks[i].pid,
561 MSG_DBWRAP_G_LOCK_RETRY,
562 &data_blob_null);
563 if (!NT_STATUS_IS_OK(status)) {
564 DEBUG(1, ("sending retry to %s failed: %s\n",
565 procid_str(talloc_tos(),
566 &locks[i].pid),
567 nt_errstr(status)));
568 } else {
569 num_wakeups += 1;
571 if (num_wakeups > 5) {
572 break;
576 done:
578 * For the error path, TALLOC_FREE(rec) as well. In the good
579 * path we have already freed it.
581 TALLOC_FREE(rec);
583 TALLOC_FREE(locks);
584 return status;
587 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
589 NTSTATUS status;
591 status = g_lock_force_unlock(ctx, name, procid_self());
593 #ifdef CLUSTER_SUPPORT
594 if (lp_clustering()) {
595 ctdb_unwatch(messaging_ctdbd_connection());
597 #endif
598 return status;
601 struct g_lock_locks_state {
602 int (*fn)(const char *name, void *private_data);
603 void *private_data;
606 static int g_lock_locks_fn(struct db_record *rec, void *priv)
608 struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
610 if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
611 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
612 return 0;
614 return state->fn((char *)rec->key.dptr, state->private_data);
617 int g_lock_locks(struct g_lock_ctx *ctx,
618 int (*fn)(const char *name, void *private_data),
619 void *private_data)
621 struct g_lock_locks_state state;
623 state.fn = fn;
624 state.private_data = private_data;
626 return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
629 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
630 int (*fn)(struct server_id pid,
631 enum g_lock_type lock_type,
632 void *private_data),
633 void *private_data)
635 TDB_DATA data;
636 int i, num_locks;
637 struct g_lock_rec *locks = NULL;
638 bool ret;
640 if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
641 &data) != 0) {
642 return NT_STATUS_NOT_FOUND;
645 if ((data.dsize == 0) || (data.dptr == NULL)) {
646 return NT_STATUS_OK;
649 ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
651 TALLOC_FREE(data.dptr);
653 if (!ret) {
654 DEBUG(10, ("g_lock_parse for %s failed\n", name));
655 return NT_STATUS_INTERNAL_ERROR;
658 for (i=0; i<num_locks; i++) {
659 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
660 break;
663 TALLOC_FREE(locks);
664 return NT_STATUS_OK;
667 struct g_lock_get_state {
668 bool found;
669 struct server_id *pid;
672 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
673 void *priv)
675 struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
677 if ((lock_type & G_LOCK_PENDING) != 0) {
678 return 0;
681 state->found = true;
682 *state->pid = pid;
683 return 1;
686 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
687 struct server_id *pid)
689 struct g_lock_get_state state;
690 NTSTATUS status;
692 state.found = false;
693 state.pid = pid;
695 status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
696 if (!NT_STATUS_IS_OK(status)) {
697 return status;
699 if (!state.found) {
700 return NT_STATUS_NOT_FOUND;
702 return NT_STATUS_OK;
705 static bool g_lock_init_all(TALLOC_CTX *mem_ctx,
706 struct tevent_context **pev,
707 struct messaging_context **pmsg,
708 struct g_lock_ctx **pg_ctx)
710 struct tevent_context *ev = NULL;
711 struct messaging_context *msg = NULL;
712 struct g_lock_ctx *g_ctx = NULL;
714 ev = tevent_context_init(mem_ctx);
715 if (ev == NULL) {
716 d_fprintf(stderr, "ERROR: could not init event context\n");
717 goto fail;
719 msg = messaging_init(mem_ctx, procid_self(), ev);
720 if (msg == NULL) {
721 d_fprintf(stderr, "ERROR: could not init messaging context\n");
722 goto fail;
724 g_ctx = g_lock_ctx_init(mem_ctx, msg);
725 if (g_ctx == NULL) {
726 d_fprintf(stderr, "ERROR: could not init g_lock context\n");
727 goto fail;
730 *pev = ev;
731 *pmsg = msg;
732 *pg_ctx = g_ctx;
733 return true;
734 fail:
735 TALLOC_FREE(g_ctx);
736 TALLOC_FREE(msg);
737 TALLOC_FREE(ev);
738 return false;
741 NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type,
742 struct timeval timeout,
743 void (*fn)(void *private_data), void *private_data)
745 struct tevent_context *ev = NULL;
746 struct messaging_context *msg = NULL;
747 struct g_lock_ctx *g_ctx = NULL;
748 NTSTATUS status;
750 if (!g_lock_init_all(talloc_tos(), &ev, &msg, &g_ctx)) {
751 status = NT_STATUS_ACCESS_DENIED;
752 goto done;
755 status = g_lock_lock(g_ctx, name, lock_type, timeout);
756 if (!NT_STATUS_IS_OK(status)) {
757 goto done;
759 fn(private_data);
760 g_lock_unlock(g_ctx, name);
762 done:
763 TALLOC_FREE(g_ctx);
764 TALLOC_FREE(msg);
765 TALLOC_FREE(ev);
766 return status;