s3: Fix a typo (cherry picked from commit bac235dd302570850bb25194ff4bd39b6d653f0d)
[Samba.git] / source3 / lib / g_lock.c
blob26b079d3bc0658018639feb49b2911238e907ad4
1 /*
2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "includes.h"
21 #include "g_lock.h"
23 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
24 struct server_id pid);
26 struct g_lock_ctx {
27 struct db_context *db;
28 struct messaging_context *msg;
32 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
33 * lockname. The record contains an array of "struct g_lock_rec"
34 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
37 struct g_lock_rec {
38 enum g_lock_type lock_type;
39 struct server_id pid;
42 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
43 struct messaging_context *msg)
45 struct g_lock_ctx *result;
47 result = talloc(mem_ctx, struct g_lock_ctx);
48 if (result == NULL) {
49 return NULL;
51 result->msg = msg;
53 result->db = db_open(result, lock_path("g_lock.tdb"), 0,
54 TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
55 if (result->db == NULL) {
56 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
57 TALLOC_FREE(result);
58 return NULL;
60 return result;
63 static bool g_lock_conflicts(enum g_lock_type lock_type,
64 const struct g_lock_rec *rec)
66 enum g_lock_type rec_lock = rec->lock_type;
68 if ((rec_lock & G_LOCK_PENDING) != 0) {
69 return false;
73 * Only tested write locks so far. Very likely this routine
74 * needs to be fixed for read locks....
76 if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
77 return false;
79 return true;
82 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
83 int *pnum_locks, struct g_lock_rec **plocks)
85 int i, num_locks;
86 struct g_lock_rec *locks;
88 if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
89 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
90 return false;
93 num_locks = data.dsize / sizeof(struct g_lock_rec);
94 locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
95 if (locks == NULL) {
96 DEBUG(1, ("talloc failed\n"));
97 return false;
100 memcpy(locks, data.dptr, data.dsize);
102 DEBUG(10, ("locks:\n"));
103 for (i=0; i<num_locks; i++) {
104 DEBUGADD(10, ("%s: %s %s\n",
105 procid_str(talloc_tos(), &locks[i].pid),
106 ((locks[i].lock_type & 1) == G_LOCK_READ) ?
107 "read" : "write",
108 (locks[i].lock_type & G_LOCK_PENDING) ?
109 "(pending)" : "(owner)"));
111 if (process_exists(locks[i].pid)) {
112 continue;
114 DEBUGADD(10, ("%s does not exist -- discarding\n",
115 procid_str(talloc_tos(), &locks[i].pid)));
117 if (i < (num_locks-1)) {
118 locks[i] = locks[num_locks-1];
120 num_locks -= 1;
123 *plocks = locks;
124 *pnum_locks = num_locks;
125 return true;
128 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
129 struct g_lock_rec *locks,
130 int num_locks,
131 const struct server_id pid,
132 enum g_lock_type lock_type)
134 struct g_lock_rec *result;
136 result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
137 num_locks+1);
138 if (result == NULL) {
139 return NULL;
142 result[num_locks].pid = pid;
143 result[num_locks].lock_type = lock_type;
144 return result;
147 static void g_lock_got_retry(struct messaging_context *msg,
148 void *private_data,
149 uint32_t msg_type,
150 struct server_id server_id,
151 DATA_BLOB *data);
153 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
154 enum g_lock_type lock_type)
156 struct db_record *rec = NULL;
157 struct g_lock_rec *locks = NULL;
158 int i, num_locks;
159 struct server_id self;
160 int our_index;
161 TDB_DATA data;
162 NTSTATUS status = NT_STATUS_OK;
163 NTSTATUS store_status;
165 again:
166 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
167 string_term_tdb_data(name));
168 if (rec == NULL) {
169 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
170 status = NT_STATUS_LOCK_NOT_GRANTED;
171 goto done;
174 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
175 DEBUG(10, ("g_lock_parse for %s failed\n", name));
176 status = NT_STATUS_INTERNAL_ERROR;
177 goto done;
180 self = procid_self();
181 our_index = -1;
183 for (i=0; i<num_locks; i++) {
184 if (procid_equal(&self, &locks[i].pid)) {
185 if (our_index != -1) {
186 DEBUG(1, ("g_lock_trylock: Added ourself "
187 "twice!\n"));
188 status = NT_STATUS_INTERNAL_ERROR;
189 goto done;
191 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
192 DEBUG(1, ("g_lock_trylock: Found ourself not "
193 "pending!\n"));
194 status = NT_STATUS_INTERNAL_ERROR;
195 goto done;
198 our_index = i;
200 /* never conflict with ourself */
201 continue;
203 if (g_lock_conflicts(lock_type, &locks[i])) {
204 struct server_id pid = locks[i].pid;
206 if (!process_exists(pid)) {
207 TALLOC_FREE(locks);
208 TALLOC_FREE(rec);
209 status = g_lock_force_unlock(ctx, name, pid);
210 if (!NT_STATUS_IS_OK(status)) {
211 DEBUG(1, ("Could not unlock dead lock "
212 "holder!\n"));
213 goto done;
215 goto again;
217 lock_type |= G_LOCK_PENDING;
221 if (our_index == -1) {
222 /* First round, add ourself */
224 locks = g_lock_addrec(talloc_tos(), locks, num_locks,
225 self, lock_type);
226 if (locks == NULL) {
227 DEBUG(10, ("g_lock_addrec failed\n"));
228 status = NT_STATUS_NO_MEMORY;
229 goto done;
231 } else {
233 * Retry. We were pending last time. Overwrite the
234 * stored lock_type with what we calculated, we might
235 * have acquired the lock this time.
237 locks[our_index].lock_type = lock_type;
240 data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
241 store_status = rec->store(rec, data, 0);
242 if (!NT_STATUS_IS_OK(store_status)) {
243 DEBUG(1, ("rec->store failed: %s\n",
244 nt_errstr(store_status)));
245 status = store_status;
248 done:
249 TALLOC_FREE(locks);
250 TALLOC_FREE(rec);
252 if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
253 return STATUS_PENDING;
256 return NT_STATUS_OK;
259 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
260 enum g_lock_type lock_type, struct timeval timeout)
262 struct tevent_timer *te = NULL;
263 NTSTATUS status;
264 bool retry = false;
265 struct timeval timeout_end;
266 struct timeval timeout_remaining;
267 struct timeval time_now;
269 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
270 name));
272 if (lock_type & ~1) {
273 DEBUG(1, ("Got invalid lock type %d for %s\n",
274 (int)lock_type, name));
275 return NT_STATUS_INVALID_PARAMETER;
278 #ifdef CLUSTER_SUPPORT
279 if (lp_clustering()) {
280 status = ctdb_watch_us(messaging_ctdbd_connection());
281 if (!NT_STATUS_IS_OK(status)) {
282 DEBUG(10, ("could not register retry with ctdb: %s\n",
283 nt_errstr(status)));
284 goto done;
287 #endif
289 status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
290 g_lock_got_retry);
291 if (!NT_STATUS_IS_OK(status)) {
292 DEBUG(10, ("messaging_register failed: %s\n",
293 nt_errstr(status)));
294 return status;
297 time_now = timeval_current();
298 timeout_end = timeval_sum(&time_now, &timeout);
300 while (true) {
301 #ifdef CLUSTER_SUPPORT
302 fd_set _r_fds;
303 #endif
304 fd_set *r_fds = NULL;
305 int max_fd = 0;
306 int ret;
308 status = g_lock_trylock(ctx, name, lock_type);
309 if (NT_STATUS_IS_OK(status)) {
310 DEBUG(10, ("Got lock %s\n", name));
311 break;
313 if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
314 DEBUG(10, ("g_lock_trylock failed: %s\n",
315 nt_errstr(status)));
316 break;
319 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
321 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
322 * !!! HACK ALERT --- FIX ME !!!
323 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
324 * What we really want to do here is to react to
325 * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
326 * by a client doing g_lock_unlock or by ourselves when
327 * we receive a CTDB_SRVID_SAMBA_NOTIFY or
328 * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
329 * either a client holding a lock or a complete node
330 * has died.
332 * Doing this properly involves calling tevent_loop_once(),
333 * but doing this here with the main ctdbd messaging context
334 * creates a nested event loop when g_lock_lock() is called
335 * from the main event loop, e.g. in a tcon_and_X where the
336 * share_info.tdb needs to be initialized and is locked by
337 * another process, or when the remore registry is accessed
338 * for writing and some other process already holds a lock
339 * on the registry.tdb.
341 * So as a quick fix, we act a little coarsely here: we do
342 * a select on the ctdb connection fd and when it is readable
343 * or we get EINTR, then we retry without actually parsing
344 * any ctdb packages or dispatching messages. This means that
345 * we retry more often than intended by design, but this does
346 * not harm and it is unobtrusive. When we have finished,
347 * the main loop will pick up all the messages and ctdb
348 * packets. The only extra twist is that we cannot use timed
349 * events here but have to handcode a timeout.
352 #ifdef CLUSTER_SUPPORT
353 if (lp_clustering()) {
354 struct ctdbd_connection *conn = messaging_ctdbd_connection();
356 r_fds = &_r_fds;
357 FD_ZERO(r_fds);
358 max_fd = ctdbd_conn_get_fd(conn);
359 FD_SET(max_fd, r_fds);
361 #endif
363 time_now = timeval_current();
364 timeout_remaining = timeval_until(&time_now, &timeout_end);
366 ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
367 &timeout_remaining);
369 if (ret == -1) {
370 if (errno != EINTR) {
371 DEBUG(1, ("error calling select: %s\n",
372 strerror(errno)));
373 status = NT_STATUS_INTERNAL_ERROR;
374 break;
377 * errno == EINTR:
378 * This means a signal was received.
379 * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
380 * ==> retry
382 } else if (ret == 0) {
383 if (timeval_expired(&timeout_end)) {
384 DEBUG(10, ("g_lock_lock timed out\n"));
385 status = NT_STATUS_LOCK_NOT_GRANTED;
386 break;
387 } else {
388 DEBUG(10, ("select returned 0 but timeout not "
389 "not expired: strange - retrying\n"));
391 } else if (ret != 1) {
392 DEBUG(1, ("invalid return code of select: %d\n", ret));
393 status = NT_STATUS_INTERNAL_ERROR;
394 break;
397 * ret == 1:
398 * This means ctdbd has sent us some data.
399 * Might be a CTDB_SRVID_RECONFIGURE or a
400 * CTDB_SRVID_SAMBA_NOTIFY message.
401 * ==> retry
405 #ifdef CLUSTER_SUPPORT
406 done:
407 #endif
409 if (!NT_STATUS_IS_OK(status)) {
410 NTSTATUS unlock_status;
412 unlock_status = g_lock_unlock(ctx, name);
414 if (!NT_STATUS_IS_OK(unlock_status)) {
415 DEBUG(1, ("Could not remove ourself from the locking "
416 "db: %s\n", nt_errstr(status)));
420 messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
421 TALLOC_FREE(te);
423 return status;
426 static void g_lock_got_retry(struct messaging_context *msg,
427 void *private_data,
428 uint32_t msg_type,
429 struct server_id server_id,
430 DATA_BLOB *data)
432 bool *pretry = (bool *)private_data;
434 DEBUG(10, ("Got retry message from pid %s\n",
435 procid_str(talloc_tos(), &server_id)));
437 *pretry = true;
440 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
441 struct server_id pid)
443 struct db_record *rec = NULL;
444 struct g_lock_rec *locks = NULL;
445 int i, num_locks;
446 enum g_lock_type lock_type;
447 NTSTATUS status;
449 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
450 string_term_tdb_data(name));
451 if (rec == NULL) {
452 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
453 status = NT_STATUS_INTERNAL_ERROR;
454 goto done;
457 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
458 DEBUG(10, ("g_lock_parse for %s failed\n", name));
459 status = NT_STATUS_INTERNAL_ERROR;
460 goto done;
463 for (i=0; i<num_locks; i++) {
464 if (procid_equal(&pid, &locks[i].pid)) {
465 break;
469 if (i == num_locks) {
470 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
471 status = NT_STATUS_INTERNAL_ERROR;
472 goto done;
475 lock_type = locks[i].lock_type;
477 if (i < (num_locks-1)) {
478 locks[i] = locks[num_locks-1];
480 num_locks -= 1;
482 if (num_locks == 0) {
483 status = rec->delete_rec(rec);
484 } else {
485 TDB_DATA data;
486 data = make_tdb_data((uint8_t *)locks,
487 sizeof(struct g_lock_rec) * num_locks);
488 status = rec->store(rec, data, 0);
491 if (!NT_STATUS_IS_OK(status)) {
492 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
493 nt_errstr(status)));
494 goto done;
497 if ((lock_type & G_LOCK_PENDING) == 0) {
499 * We've been the lock holder. Tell all others to retry.
501 for (i=0; i<num_locks; i++) {
502 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
503 continue;
507 * Ping all waiters to retry
509 status = messaging_send(ctx->msg, locks[i].pid,
510 MSG_DBWRAP_G_LOCK_RETRY,
511 &data_blob_null);
512 if (!NT_STATUS_IS_OK(status)) {
513 DEBUG(1, ("sending retry to %s failed: %s\n",
514 procid_str(talloc_tos(),
515 &locks[i].pid),
516 nt_errstr(status)));
520 done:
522 TALLOC_FREE(locks);
523 TALLOC_FREE(rec);
524 return status;
527 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
529 NTSTATUS status;
531 status = g_lock_force_unlock(ctx, name, procid_self());
533 #ifdef CLUSTER_SUPPORT
534 if (lp_clustering()) {
535 ctdb_unwatch(messaging_ctdbd_connection());
537 #endif
538 return status;
541 struct g_lock_locks_state {
542 int (*fn)(const char *name, void *private_data);
543 void *private_data;
546 static int g_lock_locks_fn(struct db_record *rec, void *priv)
548 struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
550 if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
551 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
552 return 0;
554 return state->fn((char *)rec->key.dptr, state->private_data);
557 int g_lock_locks(struct g_lock_ctx *ctx,
558 int (*fn)(const char *name, void *private_data),
559 void *private_data)
561 struct g_lock_locks_state state;
563 state.fn = fn;
564 state.private_data = private_data;
566 return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
569 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
570 int (*fn)(struct server_id pid,
571 enum g_lock_type lock_type,
572 void *private_data),
573 void *private_data)
575 TDB_DATA data;
576 int i, num_locks;
577 struct g_lock_rec *locks = NULL;
578 bool ret;
580 if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
581 &data) != 0) {
582 return NT_STATUS_NOT_FOUND;
585 if ((data.dsize == 0) || (data.dptr == NULL)) {
586 return NT_STATUS_OK;
589 ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
591 TALLOC_FREE(data.dptr);
593 if (!ret) {
594 DEBUG(10, ("g_lock_parse for %s failed\n", name));
595 return NT_STATUS_INTERNAL_ERROR;
598 for (i=0; i<num_locks; i++) {
599 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
600 break;
603 TALLOC_FREE(locks);
604 return NT_STATUS_OK;
607 struct g_lock_get_state {
608 bool found;
609 struct server_id *pid;
612 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
613 void *priv)
615 struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
617 if ((lock_type & G_LOCK_PENDING) != 0) {
618 return 0;
621 state->found = true;
622 *state->pid = pid;
623 return 1;
626 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
627 struct server_id *pid)
629 struct g_lock_get_state state;
630 NTSTATUS status;
632 state.found = false;
633 state.pid = pid;
635 status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
636 if (!NT_STATUS_IS_OK(status)) {
637 return status;
639 if (!state.found) {
640 return NT_STATUS_NOT_FOUND;
642 return NT_STATUS_OK;