2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "librpc/gen_ndr/messaging.h"
24 static NTSTATUS
g_lock_force_unlock(struct g_lock_ctx
*ctx
, const char *name
,
25 struct server_id pid
);
28 struct db_context
*db
;
29 struct messaging_context
*msg
;
33 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
34 * lockname. The record contains an array of "struct g_lock_rec"
35 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
39 enum g_lock_type lock_type
;
43 struct g_lock_ctx
*g_lock_ctx_init(TALLOC_CTX
*mem_ctx
,
44 struct messaging_context
*msg
)
46 struct g_lock_ctx
*result
;
48 result
= talloc(mem_ctx
, struct g_lock_ctx
);
54 result
->db
= db_open(result
, lock_path("g_lock.tdb"), 0,
55 TDB_CLEAR_IF_FIRST
, O_RDWR
|O_CREAT
, 0700);
56 if (result
->db
== NULL
) {
57 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
64 static bool g_lock_conflicts(enum g_lock_type lock_type
,
65 const struct g_lock_rec
*rec
)
67 enum g_lock_type rec_lock
= rec
->lock_type
;
69 if ((rec_lock
& G_LOCK_PENDING
) != 0) {
74 * Only tested write locks so far. Very likely this routine
75 * needs to be fixed for read locks....
77 if ((lock_type
== G_LOCK_READ
) && (rec_lock
== G_LOCK_READ
)) {
83 static bool g_lock_parse(TALLOC_CTX
*mem_ctx
, TDB_DATA data
,
84 int *pnum_locks
, struct g_lock_rec
**plocks
)
87 struct g_lock_rec
*locks
;
89 if ((data
.dsize
% sizeof(struct g_lock_rec
)) != 0) {
90 DEBUG(1, ("invalid lock record length %d\n", (int)data
.dsize
));
94 num_locks
= data
.dsize
/ sizeof(struct g_lock_rec
);
95 locks
= talloc_array(mem_ctx
, struct g_lock_rec
, num_locks
);
97 DEBUG(1, ("talloc failed\n"));
101 memcpy(locks
, data
.dptr
, data
.dsize
);
103 DEBUG(10, ("locks:\n"));
104 for (i
=0; i
<num_locks
; i
++) {
105 DEBUGADD(10, ("%s: %s %s\n",
106 procid_str(talloc_tos(), &locks
[i
].pid
),
107 ((locks
[i
].lock_type
& 1) == G_LOCK_READ
) ?
109 (locks
[i
].lock_type
& G_LOCK_PENDING
) ?
110 "(pending)" : "(owner)"));
112 if (((locks
[i
].lock_type
& G_LOCK_PENDING
) == 0)
113 && !process_exists(locks
[i
].pid
)) {
115 DEBUGADD(10, ("lock owner %s died -- discarding\n",
116 procid_str(talloc_tos(),
119 if (i
< (num_locks
-1)) {
120 locks
[i
] = locks
[num_locks
-1];
127 *pnum_locks
= num_locks
;
131 static void g_lock_cleanup(int *pnum_locks
, struct g_lock_rec
*locks
)
135 num_locks
= *pnum_locks
;
137 DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks
));
139 for (i
=0; i
<num_locks
; i
++) {
140 if (process_exists(locks
[i
].pid
)) {
143 DEBUGADD(10, ("%s does not exist -- discarding\n",
144 procid_str(talloc_tos(), &locks
[i
].pid
)));
146 if (i
< (num_locks
-1)) {
147 locks
[i
] = locks
[num_locks
-1];
151 *pnum_locks
= num_locks
;
155 static struct g_lock_rec
*g_lock_addrec(TALLOC_CTX
*mem_ctx
,
156 struct g_lock_rec
*locks
,
158 const struct server_id pid
,
159 enum g_lock_type lock_type
)
161 struct g_lock_rec
*result
;
162 int num_locks
= *pnum_locks
;
164 result
= talloc_realloc(mem_ctx
, locks
, struct g_lock_rec
,
166 if (result
== NULL
) {
170 result
[num_locks
].pid
= pid
;
171 result
[num_locks
].lock_type
= lock_type
;
176 static void g_lock_got_retry(struct messaging_context
*msg
,
179 struct server_id server_id
,
182 static NTSTATUS
g_lock_trylock(struct g_lock_ctx
*ctx
, const char *name
,
183 enum g_lock_type lock_type
)
185 struct db_record
*rec
= NULL
;
186 struct g_lock_rec
*locks
= NULL
;
188 struct server_id self
;
191 NTSTATUS status
= NT_STATUS_OK
;
192 NTSTATUS store_status
;
195 rec
= ctx
->db
->fetch_locked(ctx
->db
, talloc_tos(),
196 string_term_tdb_data(name
));
198 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name
));
199 status
= NT_STATUS_LOCK_NOT_GRANTED
;
203 if (!g_lock_parse(talloc_tos(), rec
->value
, &num_locks
, &locks
)) {
204 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
205 status
= NT_STATUS_INTERNAL_ERROR
;
209 self
= procid_self();
212 for (i
=0; i
<num_locks
; i
++) {
213 if (procid_equal(&self
, &locks
[i
].pid
)) {
214 if (our_index
!= -1) {
215 DEBUG(1, ("g_lock_trylock: Added ourself "
217 status
= NT_STATUS_INTERNAL_ERROR
;
220 if ((locks
[i
].lock_type
& G_LOCK_PENDING
) == 0) {
221 DEBUG(1, ("g_lock_trylock: Found ourself not "
223 status
= NT_STATUS_INTERNAL_ERROR
;
229 /* never conflict with ourself */
232 if (g_lock_conflicts(lock_type
, &locks
[i
])) {
233 struct server_id pid
= locks
[i
].pid
;
235 if (!process_exists(pid
)) {
238 status
= g_lock_force_unlock(ctx
, name
, pid
);
239 if (!NT_STATUS_IS_OK(status
)) {
240 DEBUG(1, ("Could not unlock dead lock "
246 lock_type
|= G_LOCK_PENDING
;
250 if (our_index
== -1) {
251 /* First round, add ourself */
253 locks
= g_lock_addrec(talloc_tos(), locks
, &num_locks
,
256 DEBUG(10, ("g_lock_addrec failed\n"));
257 status
= NT_STATUS_NO_MEMORY
;
262 * Retry. We were pending last time. Overwrite the
263 * stored lock_type with what we calculated, we might
264 * have acquired the lock this time.
266 locks
[our_index
].lock_type
= lock_type
;
269 if (NT_STATUS_IS_OK(status
) && ((lock_type
& G_LOCK_PENDING
) == 0)) {
271 * Walk through the list of locks, search for dead entries
273 g_lock_cleanup(&num_locks
, locks
);
276 data
= make_tdb_data((uint8_t *)locks
, num_locks
* sizeof(*locks
));
277 store_status
= rec
->store(rec
, data
, 0);
278 if (!NT_STATUS_IS_OK(store_status
)) {
279 DEBUG(1, ("rec->store failed: %s\n",
280 nt_errstr(store_status
)));
281 status
= store_status
;
288 if (NT_STATUS_IS_OK(status
) && (lock_type
& G_LOCK_PENDING
) != 0) {
289 return STATUS_PENDING
;
295 NTSTATUS
g_lock_lock(struct g_lock_ctx
*ctx
, const char *name
,
296 enum g_lock_type lock_type
, struct timeval timeout
)
298 struct tevent_timer
*te
= NULL
;
301 struct timeval timeout_end
;
302 struct timeval time_now
;
304 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type
,
307 if (lock_type
& ~1) {
308 DEBUG(1, ("Got invalid lock type %d for %s\n",
309 (int)lock_type
, name
));
310 return NT_STATUS_INVALID_PARAMETER
;
313 #ifdef CLUSTER_SUPPORT
314 if (lp_clustering()) {
315 status
= ctdb_watch_us(messaging_ctdbd_connection());
316 if (!NT_STATUS_IS_OK(status
)) {
317 DEBUG(10, ("could not register retry with ctdb: %s\n",
324 status
= messaging_register(ctx
->msg
, &retry
, MSG_DBWRAP_G_LOCK_RETRY
,
326 if (!NT_STATUS_IS_OK(status
)) {
327 DEBUG(10, ("messaging_register failed: %s\n",
332 time_now
= timeval_current();
333 timeout_end
= timeval_sum(&time_now
, &timeout
);
336 #ifdef CLUSTER_SUPPORT
339 fd_set
*r_fds
= NULL
;
342 struct timeval timeout_remaining
, select_timeout
;
344 status
= g_lock_trylock(ctx
, name
, lock_type
);
345 if (NT_STATUS_IS_OK(status
)) {
346 DEBUG(10, ("Got lock %s\n", name
));
349 if (!NT_STATUS_EQUAL(status
, STATUS_PENDING
)) {
350 DEBUG(10, ("g_lock_trylock failed: %s\n",
355 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
357 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
358 * !!! HACK ALERT --- FIX ME !!!
359 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
360 * What we really want to do here is to react to
361 * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
362 * by a client doing g_lock_unlock or by ourselves when
363 * we receive a CTDB_SRVID_SAMBA_NOTIFY or
364 * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
365 * either a client holding a lock or a complete node
368 * Doing this properly involves calling tevent_loop_once(),
369 * but doing this here with the main ctdbd messaging context
370 * creates a nested event loop when g_lock_lock() is called
371 * from the main event loop, e.g. in a tcon_and_X where the
372 * share_info.tdb needs to be initialized and is locked by
373 * another process, or when the remore registry is accessed
374 * for writing and some other process already holds a lock
375 * on the registry.tdb.
377 * So as a quick fix, we act a little coarsely here: we do
378 * a select on the ctdb connection fd and when it is readable
379 * or we get EINTR, then we retry without actually parsing
380 * any ctdb packages or dispatching messages. This means that
381 * we retry more often than intended by design, but this does
382 * not harm and it is unobtrusive. When we have finished,
383 * the main loop will pick up all the messages and ctdb
384 * packets. The only extra twist is that we cannot use timed
385 * events here but have to handcode a timeout.
388 #ifdef CLUSTER_SUPPORT
389 if (lp_clustering()) {
390 struct ctdbd_connection
*conn
= messaging_ctdbd_connection();
394 max_fd
= ctdbd_conn_get_fd(conn
);
395 FD_SET(max_fd
, r_fds
);
399 time_now
= timeval_current();
400 timeout_remaining
= timeval_until(&time_now
, &timeout_end
);
401 select_timeout
= timeval_set(60, 0);
403 select_timeout
= timeval_min(&select_timeout
,
406 ret
= sys_select(max_fd
+ 1, r_fds
, NULL
, NULL
,
409 if (errno
!= EINTR
) {
410 DEBUG(1, ("error calling select: %s\n",
412 status
= NT_STATUS_INTERNAL_ERROR
;
417 * This means a signal was received.
418 * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
421 } else if (ret
== 0) {
422 if (timeval_expired(&timeout_end
)) {
423 DEBUG(10, ("g_lock_lock timed out\n"));
424 status
= NT_STATUS_LOCK_NOT_GRANTED
;
427 DEBUG(10, ("select returned 0 but timeout not "
428 "not expired, retrying\n"));
430 } else if (ret
!= 1) {
431 DEBUG(1, ("invalid return code of select: %d\n", ret
));
432 status
= NT_STATUS_INTERNAL_ERROR
;
437 * This means ctdbd has sent us some data.
438 * Might be a CTDB_SRVID_RECONFIGURE or a
439 * CTDB_SRVID_SAMBA_NOTIFY message.
444 #ifdef CLUSTER_SUPPORT
448 if (!NT_STATUS_IS_OK(status
)) {
449 NTSTATUS unlock_status
;
451 unlock_status
= g_lock_unlock(ctx
, name
);
453 if (!NT_STATUS_IS_OK(unlock_status
)) {
454 DEBUG(1, ("Could not remove ourself from the locking "
455 "db: %s\n", nt_errstr(status
)));
459 messaging_deregister(ctx
->msg
, MSG_DBWRAP_G_LOCK_RETRY
, &retry
);
465 static void g_lock_got_retry(struct messaging_context
*msg
,
468 struct server_id server_id
,
471 bool *pretry
= (bool *)private_data
;
473 DEBUG(10, ("Got retry message from pid %s\n",
474 procid_str(talloc_tos(), &server_id
)));
479 static NTSTATUS
g_lock_force_unlock(struct g_lock_ctx
*ctx
, const char *name
,
480 struct server_id pid
)
482 struct db_record
*rec
= NULL
;
483 struct g_lock_rec
*locks
= NULL
;
485 enum g_lock_type lock_type
;
488 rec
= ctx
->db
->fetch_locked(ctx
->db
, talloc_tos(),
489 string_term_tdb_data(name
));
491 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name
));
492 status
= NT_STATUS_INTERNAL_ERROR
;
496 if (!g_lock_parse(talloc_tos(), rec
->value
, &num_locks
, &locks
)) {
497 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
498 status
= NT_STATUS_INTERNAL_ERROR
;
502 for (i
=0; i
<num_locks
; i
++) {
503 if (procid_equal(&pid
, &locks
[i
].pid
)) {
508 if (i
== num_locks
) {
509 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
510 status
= NT_STATUS_INTERNAL_ERROR
;
514 lock_type
= locks
[i
].lock_type
;
516 if (i
< (num_locks
-1)) {
517 locks
[i
] = locks
[num_locks
-1];
521 if (num_locks
== 0) {
522 status
= rec
->delete_rec(rec
);
525 data
= make_tdb_data((uint8_t *)locks
,
526 sizeof(struct g_lock_rec
) * num_locks
);
527 status
= rec
->store(rec
, data
, 0);
530 if (!NT_STATUS_IS_OK(status
)) {
531 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
538 if ((lock_type
& G_LOCK_PENDING
) == 0) {
542 * We've been the lock holder. Others to retry. Don't
543 * tell all others to avoid a thundering herd. In case
544 * this leads to a complete stall because we miss some
545 * processes, the loop in g_lock_lock tries at least
549 for (i
=0; i
<num_locks
; i
++) {
550 if ((locks
[i
].lock_type
& G_LOCK_PENDING
) == 0) {
553 if (!process_exists(locks
[i
].pid
)) {
558 * Ping all waiters to retry
560 status
= messaging_send(ctx
->msg
, locks
[i
].pid
,
561 MSG_DBWRAP_G_LOCK_RETRY
,
563 if (!NT_STATUS_IS_OK(status
)) {
564 DEBUG(1, ("sending retry to %s failed: %s\n",
565 procid_str(talloc_tos(),
571 if (num_wakeups
> 5) {
578 * For the error path, TALLOC_FREE(rec) as well. In the good
579 * path we have already freed it.
587 NTSTATUS
g_lock_unlock(struct g_lock_ctx
*ctx
, const char *name
)
591 status
= g_lock_force_unlock(ctx
, name
, procid_self());
593 #ifdef CLUSTER_SUPPORT
594 if (lp_clustering()) {
595 ctdb_unwatch(messaging_ctdbd_connection());
601 struct g_lock_locks_state
{
602 int (*fn
)(const char *name
, void *private_data
);
606 static int g_lock_locks_fn(struct db_record
*rec
, void *priv
)
608 struct g_lock_locks_state
*state
= (struct g_lock_locks_state
*)priv
;
610 if ((rec
->key
.dsize
== 0) || (rec
->key
.dptr
[rec
->key
.dsize
-1] != 0)) {
611 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
614 return state
->fn((char *)rec
->key
.dptr
, state
->private_data
);
617 int g_lock_locks(struct g_lock_ctx
*ctx
,
618 int (*fn
)(const char *name
, void *private_data
),
621 struct g_lock_locks_state state
;
624 state
.private_data
= private_data
;
626 return ctx
->db
->traverse_read(ctx
->db
, g_lock_locks_fn
, &state
);
629 NTSTATUS
g_lock_dump(struct g_lock_ctx
*ctx
, const char *name
,
630 int (*fn
)(struct server_id pid
,
631 enum g_lock_type lock_type
,
637 struct g_lock_rec
*locks
= NULL
;
640 if (ctx
->db
->fetch(ctx
->db
, talloc_tos(), string_term_tdb_data(name
),
642 return NT_STATUS_NOT_FOUND
;
645 if ((data
.dsize
== 0) || (data
.dptr
== NULL
)) {
649 ret
= g_lock_parse(talloc_tos(), data
, &num_locks
, &locks
);
651 TALLOC_FREE(data
.dptr
);
654 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
655 return NT_STATUS_INTERNAL_ERROR
;
658 for (i
=0; i
<num_locks
; i
++) {
659 if (fn(locks
[i
].pid
, locks
[i
].lock_type
, private_data
) != 0) {
667 struct g_lock_get_state
{
669 struct server_id
*pid
;
672 static int g_lock_get_fn(struct server_id pid
, enum g_lock_type lock_type
,
675 struct g_lock_get_state
*state
= (struct g_lock_get_state
*)priv
;
677 if ((lock_type
& G_LOCK_PENDING
) != 0) {
686 NTSTATUS
g_lock_get(struct g_lock_ctx
*ctx
, const char *name
,
687 struct server_id
*pid
)
689 struct g_lock_get_state state
;
695 status
= g_lock_dump(ctx
, name
, g_lock_get_fn
, &state
);
696 if (!NT_STATUS_IS_OK(status
)) {
700 return NT_STATUS_NOT_FOUND
;
705 static bool g_lock_init_all(TALLOC_CTX
*mem_ctx
,
706 struct tevent_context
**pev
,
707 struct messaging_context
**pmsg
,
708 struct g_lock_ctx
**pg_ctx
)
710 struct tevent_context
*ev
= NULL
;
711 struct messaging_context
*msg
= NULL
;
712 struct g_lock_ctx
*g_ctx
= NULL
;
714 ev
= tevent_context_init(mem_ctx
);
716 d_fprintf(stderr
, "ERROR: could not init event context\n");
719 msg
= messaging_init(mem_ctx
, procid_self(), ev
);
721 d_fprintf(stderr
, "ERROR: could not init messaging context\n");
724 g_ctx
= g_lock_ctx_init(mem_ctx
, msg
);
726 d_fprintf(stderr
, "ERROR: could not init g_lock context\n");
741 NTSTATUS
g_lock_do(const char *name
, enum g_lock_type lock_type
,
742 struct timeval timeout
,
743 void (*fn
)(void *private_data
), void *private_data
)
745 struct tevent_context
*ev
= NULL
;
746 struct messaging_context
*msg
= NULL
;
747 struct g_lock_ctx
*g_ctx
= NULL
;
750 if (!g_lock_init_all(talloc_tos(), &ev
, &msg
, &g_ctx
)) {
751 status
= NT_STATUS_ACCESS_DENIED
;
755 status
= g_lock_lock(g_ctx
, name
, lock_type
, timeout
);
756 if (!NT_STATUS_IS_OK(status
)) {
760 g_lock_unlock(g_ctx
, name
);