s3: Implement global locks in a g_lock tdb
[Samba/kamenim.git] / source3 / lib / g_lock.c
blob6508b3995b02c48c9d5b805969d4bb84f1c80288
1 /*
2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "includes.h"
21 #include "g_lock.h"
23 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
24 struct server_id pid);
26 struct g_lock_ctx {
27 struct db_context *db;
28 struct messaging_context *msg;
32 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
33 * lockname. The record contains an array of "struct g_lock_rec"
34 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
37 struct g_lock_rec {
38 enum g_lock_type lock_type;
39 struct server_id pid;
42 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
43 struct messaging_context *msg)
45 struct g_lock_ctx *result;
47 result = talloc(mem_ctx, struct g_lock_ctx);
48 if (result == NULL) {
49 return NULL;
51 result->msg = msg;
53 result->db = db_open(result, lock_path("g_lock.tdb"), 0,
54 TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
55 if (result->db == NULL) {
56 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
57 TALLOC_FREE(result);
58 return NULL;
60 return result;
63 static bool g_lock_conflicts(enum g_lock_type lock_type,
64 const struct g_lock_rec *rec)
66 enum g_lock_type rec_lock = rec->lock_type;
68 if ((rec_lock & G_LOCK_PENDING) != 0) {
69 return false;
73 * Only tested write locks so far. Very likely this routine
74 * needs to be fixed for read locks....
76 if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
77 return false;
79 return true;
82 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
83 int *pnum_locks, struct g_lock_rec **plocks)
85 int i, num_locks;
86 struct g_lock_rec *locks;
88 if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
89 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
90 return false;
93 num_locks = data.dsize / sizeof(struct g_lock_rec);
94 locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
95 if (locks == NULL) {
96 DEBUG(1, ("talloc failed\n"));
97 return false;
100 memcpy(locks, data.dptr, data.dsize);
102 DEBUG(10, ("locks:\n"));
103 for (i=0; i<num_locks; i++) {
104 DEBUGADD(10, ("%s: %s %s\n",
105 procid_str(talloc_tos(), &locks[i].pid),
106 ((locks[i].lock_type & 1) == G_LOCK_READ) ?
107 "read" : "write",
108 (locks[i].lock_type & G_LOCK_PENDING) ?
109 "(pending)" : "(owner)"));
111 if (process_exists(locks[i].pid)) {
112 continue;
114 DEBUGADD(10, ("%s does not exist -- discarding\n",
115 procid_str(talloc_tos(), &locks[i].pid)));
117 if (i < (num_locks-1)) {
118 locks[i] = locks[num_locks-1];
120 num_locks -= 1;
123 *plocks = locks;
124 *pnum_locks = num_locks;
125 return true;
128 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
129 struct g_lock_rec *locks,
130 int num_locks,
131 const struct server_id pid,
132 enum g_lock_type lock_type)
134 struct g_lock_rec *result;
136 result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
137 num_locks+1);
138 if (result == NULL) {
139 return NULL;
142 result[num_locks].pid = pid;
143 result[num_locks].lock_type = lock_type;
144 return result;
147 static void g_lock_got_retry(struct messaging_context *msg,
148 void *private_data,
149 uint32_t msg_type,
150 struct server_id server_id,
151 DATA_BLOB *data);
152 static void g_lock_timedout(struct tevent_context *ev,
153 struct tevent_timer *te,
154 struct timeval current_time,
155 void *private_data);
157 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
158 enum g_lock_type lock_type)
160 struct db_record *rec = NULL;
161 struct g_lock_rec *locks = NULL;
162 int i, num_locks;
163 struct server_id self;
164 int our_index;
165 TDB_DATA data;
166 NTSTATUS status = NT_STATUS_OK;
167 NTSTATUS store_status;
169 again:
170 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
171 string_term_tdb_data(name));
172 if (rec == NULL) {
173 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
174 status = NT_STATUS_LOCK_NOT_GRANTED;
175 goto done;
178 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
179 DEBUG(10, ("g_lock_parse for %s failed\n", name));
180 status = NT_STATUS_INTERNAL_ERROR;
181 goto done;
184 self = procid_self();
185 our_index = -1;
187 for (i=0; i<num_locks; i++) {
188 if (procid_equal(&self, &locks[i].pid)) {
189 if (our_index != -1) {
190 DEBUG(1, ("g_lock_trylock: Added ourself "
191 "twice!\n"));
192 status = NT_STATUS_INTERNAL_ERROR;
193 goto done;
195 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
196 DEBUG(1, ("g_lock_trylock: Found ourself not "
197 "pending!\n"));
198 status = NT_STATUS_INTERNAL_ERROR;
199 goto done;
202 our_index = i;
204 /* never conflict with ourself */
205 continue;
207 if (g_lock_conflicts(lock_type, &locks[i])) {
208 struct server_id pid = locks[i].pid;
210 if (!process_exists(pid)) {
211 TALLOC_FREE(locks);
212 TALLOC_FREE(rec);
213 status = g_lock_force_unlock(ctx, name, pid);
214 if (!NT_STATUS_IS_OK(status)) {
215 DEBUG(1, ("Could not unlock dead lock "
216 "holder!\n"));
217 goto done;
219 goto again;
221 lock_type |= G_LOCK_PENDING;
225 if (our_index == -1) {
226 /* First round, add ourself */
228 locks = g_lock_addrec(talloc_tos(), locks, num_locks,
229 self, lock_type);
230 if (locks == NULL) {
231 DEBUG(10, ("g_lock_addrec failed\n"));
232 status = NT_STATUS_NO_MEMORY;
233 goto done;
235 } else {
237 * Retry. We were pending last time. Overwrite the
238 * stored lock_type with what we calculated, we might
239 * have acquired the lock this time.
241 locks[our_index].lock_type = lock_type;
244 data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
245 store_status = rec->store(rec, data, 0);
246 if (!NT_STATUS_IS_OK(store_status)) {
247 DEBUG(1, ("rec->store failed: %s\n",
248 nt_errstr(store_status)));
249 status = store_status;
252 done:
253 TALLOC_FREE(locks);
254 TALLOC_FREE(rec);
256 if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
257 return STATUS_PENDING;
260 return NT_STATUS_OK;
263 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
264 enum g_lock_type lock_type, struct timeval timeout)
266 struct tevent_timer *te = NULL;
267 NTSTATUS status;
268 bool retry = false;
269 bool timedout = false;
271 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
272 name));
274 if (lock_type & ~1) {
275 DEBUG(1, ("Got invalid lock type %d for %s\n",
276 (int)lock_type, name));
277 return NT_STATUS_INVALID_PARAMETER;
280 #ifdef CLUSTER_SUPPORT
281 if (lp_clustering()) {
282 status = ctdb_watch_us(messaging_ctdbd_connection());
283 if (!NT_STATUS_IS_OK(status)) {
284 DEBUG(10, ("could not register retry with ctdb: %s\n",
285 nt_errstr(status)));
286 goto done;
289 #endif
291 status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
292 g_lock_got_retry);
293 if (!NT_STATUS_IS_OK(status)) {
294 DEBUG(10, ("messaging_register failed: %s\n",
295 nt_errstr(status)));
296 return status;
298 again:
299 retry = false;
301 status = g_lock_trylock(ctx, name, lock_type);
302 if (NT_STATUS_IS_OK(status)) {
303 DEBUG(10, ("Got lock %s\n", name));
304 goto done;
306 if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
307 DEBUG(10, ("g_lock_trylock failed: %s\n",
308 nt_errstr(status)));
309 goto done;
312 if (retry) {
313 goto again;
316 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
318 if (te == NULL) {
319 te = tevent_add_timer(
320 ctx->msg->event_ctx, talloc_tos(),
321 timeval_current_ofs(timeout.tv_sec, timeout.tv_usec),
322 g_lock_timedout, &timedout);
323 if (te == NULL) {
324 DEBUG(10, ("tevent_add_timer failed\n"));
325 status = NT_STATUS_NO_MEMORY;
326 goto done;
330 while (true) {
331 if (tevent_loop_once(ctx->msg->event_ctx) == -1) {
332 DEBUG(1, ("tevent_loop_once failed\n"));
333 status = NT_STATUS_INTERNAL_ERROR;
334 goto done;
336 if (retry) {
337 goto again;
339 if (timedout) {
340 DEBUG(10, ("g_lock_lock timed out\n"));
342 te = NULL;
344 status = NT_STATUS_LOCK_NOT_GRANTED;
345 goto done;
348 done:
350 if (!NT_STATUS_IS_OK(status)) {
351 NTSTATUS unlock_status;
353 unlock_status = g_lock_unlock(ctx, name);
355 if (!NT_STATUS_IS_OK(unlock_status)) {
356 DEBUG(1, ("Could not remove ourself from the locking "
357 "db: %s\n", nt_errstr(status)));
361 messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
362 TALLOC_FREE(te);
364 return status;
367 static void g_lock_got_retry(struct messaging_context *msg,
368 void *private_data,
369 uint32_t msg_type,
370 struct server_id server_id,
371 DATA_BLOB *data)
373 bool *pretry = (bool *)private_data;
375 DEBUG(10, ("Got retry message from pid %s\n",
376 procid_str(talloc_tos(), &server_id)));
378 *pretry = true;
381 static void g_lock_timedout(struct tevent_context *ev,
382 struct tevent_timer *te,
383 struct timeval current_time,
384 void *private_data)
386 bool *ptimedout = (bool *)private_data;
387 *ptimedout = true;
388 TALLOC_FREE(te);
391 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
392 struct server_id pid)
394 struct db_record *rec = NULL;
395 struct g_lock_rec *locks = NULL;
396 int i, num_locks;
397 enum g_lock_type lock_type;
398 NTSTATUS status;
400 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
401 string_term_tdb_data(name));
402 if (rec == NULL) {
403 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
404 status = NT_STATUS_INTERNAL_ERROR;
405 goto done;
408 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
409 DEBUG(10, ("g_lock_parse for %s failed\n", name));
410 status = NT_STATUS_INTERNAL_ERROR;
411 goto done;
414 for (i=0; i<num_locks; i++) {
415 if (procid_equal(&pid, &locks[i].pid)) {
416 break;
420 if (i == num_locks) {
421 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
422 status = NT_STATUS_INTERNAL_ERROR;
423 goto done;
426 lock_type = locks[i].lock_type;
428 if (i < (num_locks-1)) {
429 locks[i] = locks[num_locks-1];
431 num_locks -= 1;
433 if (num_locks == 0) {
434 status = rec->delete_rec(rec);
435 } else {
436 TDB_DATA data;
437 data = make_tdb_data((uint8_t *)locks,
438 sizeof(struct g_lock_rec) * num_locks);
439 status = rec->store(rec, data, 0);
442 if (!NT_STATUS_IS_OK(status)) {
443 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
444 nt_errstr(status)));
445 goto done;
448 if ((lock_type & G_LOCK_PENDING) == 0) {
450 * We've been the lock holder. Tell all others to retry.
452 for (i=0; i<num_locks; i++) {
453 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
454 continue;
458 * Ping all waiters to retry
460 status = messaging_send(ctx->msg, locks[i].pid,
461 MSG_DBWRAP_G_LOCK_RETRY,
462 &data_blob_null);
463 if (!NT_STATUS_IS_OK(status)) {
464 DEBUG(1, ("sending retry to %s failed: %s\n",
465 procid_str(talloc_tos(),
466 &locks[i].pid),
467 nt_errstr(status)));
471 done:
473 TALLOC_FREE(locks);
474 TALLOC_FREE(rec);
475 return status;
478 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
480 NTSTATUS status;
482 status = g_lock_force_unlock(ctx, name, procid_self());
484 #ifdef CLUSTER_SUPPORT
485 if (lp_clustering()) {
486 ctdb_unwatch(messaging_ctdbd_connection());
488 #endif
489 return status;
492 struct g_lock_locks_state {
493 int (*fn)(const char *name, void *private_data);
494 void *private_data;
497 static int g_lock_locks_fn(struct db_record *rec, void *priv)
499 struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
501 if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
502 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
503 return 0;
505 return state->fn((char *)rec->key.dptr, state->private_data);
508 int g_lock_locks(struct g_lock_ctx *ctx,
509 int (*fn)(const char *name, void *private_data),
510 void *private_data)
512 struct g_lock_locks_state state;
514 state.fn = fn;
515 state.private_data = private_data;
517 return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
520 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
521 int (*fn)(struct server_id pid,
522 enum g_lock_type lock_type,
523 void *private_data),
524 void *private_data)
526 TDB_DATA data;
527 int i, num_locks;
528 struct g_lock_rec *locks = NULL;
529 bool ret;
531 if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
532 &data) != 0) {
533 return NT_STATUS_NOT_FOUND;
536 if ((data.dsize == 0) || (data.dptr == NULL)) {
537 return NT_STATUS_OK;
540 ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
542 TALLOC_FREE(data.dptr);
544 if (!ret) {
545 DEBUG(10, ("g_lock_parse for %s failed\n", name));
546 return NT_STATUS_INTERNAL_ERROR;
549 for (i=0; i<num_locks; i++) {
550 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
551 break;
554 TALLOC_FREE(locks);
555 return NT_STATUS_OK;
558 struct g_lock_get_state {
559 bool found;
560 struct server_id *pid;
563 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
564 void *priv)
566 struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
568 if ((lock_type & G_LOCK_PENDING) != 0) {
569 return 0;
572 state->found = true;
573 *state->pid = pid;
574 return 1;
577 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
578 struct server_id *pid)
580 struct g_lock_get_state state;
581 NTSTATUS status;
583 state.found = false;
584 state.pid = pid;
586 status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
587 if (!NT_STATUS_IS_OK(status)) {
588 return status;
590 if (!state.found) {
591 return NT_STATUS_NOT_FOUND;
593 return NT_STATUS_OK;