2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 static NTSTATUS
g_lock_force_unlock(struct g_lock_ctx
*ctx
, const char *name
,
24 struct server_id pid
);
27 struct db_context
*db
;
28 struct messaging_context
*msg
;
32 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
33 * lockname. The record contains an array of "struct g_lock_rec"
34 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
38 enum g_lock_type lock_type
;
42 struct g_lock_ctx
*g_lock_ctx_init(TALLOC_CTX
*mem_ctx
,
43 struct messaging_context
*msg
)
45 struct g_lock_ctx
*result
;
47 result
= talloc(mem_ctx
, struct g_lock_ctx
);
53 result
->db
= db_open(result
, lock_path("g_lock.tdb"), 0,
54 TDB_CLEAR_IF_FIRST
, O_RDWR
|O_CREAT
, 0700);
55 if (result
->db
== NULL
) {
56 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
63 static bool g_lock_conflicts(enum g_lock_type lock_type
,
64 const struct g_lock_rec
*rec
)
66 enum g_lock_type rec_lock
= rec
->lock_type
;
68 if ((rec_lock
& G_LOCK_PENDING
) != 0) {
73 * Only tested write locks so far. Very likely this routine
74 * needs to be fixed for read locks....
76 if ((lock_type
== G_LOCK_READ
) && (rec_lock
== G_LOCK_READ
)) {
82 static bool g_lock_parse(TALLOC_CTX
*mem_ctx
, TDB_DATA data
,
83 int *pnum_locks
, struct g_lock_rec
**plocks
)
86 struct g_lock_rec
*locks
;
88 if ((data
.dsize
% sizeof(struct g_lock_rec
)) != 0) {
89 DEBUG(1, ("invalid lock record length %d\n", (int)data
.dsize
));
93 num_locks
= data
.dsize
/ sizeof(struct g_lock_rec
);
94 locks
= talloc_array(mem_ctx
, struct g_lock_rec
, num_locks
);
96 DEBUG(1, ("talloc failed\n"));
100 memcpy(locks
, data
.dptr
, data
.dsize
);
102 DEBUG(10, ("locks:\n"));
103 for (i
=0; i
<num_locks
; i
++) {
104 DEBUGADD(10, ("%s: %s %s\n",
105 procid_str(talloc_tos(), &locks
[i
].pid
),
106 ((locks
[i
].lock_type
& 1) == G_LOCK_READ
) ?
108 (locks
[i
].lock_type
& G_LOCK_PENDING
) ?
109 "(pending)" : "(owner)"));
111 if (process_exists(locks
[i
].pid
)) {
114 DEBUGADD(10, ("%s does not exist -- discarding\n",
115 procid_str(talloc_tos(), &locks
[i
].pid
)));
117 if (i
< (num_locks
-1)) {
118 locks
[i
] = locks
[num_locks
-1];
124 *pnum_locks
= num_locks
;
128 static struct g_lock_rec
*g_lock_addrec(TALLOC_CTX
*mem_ctx
,
129 struct g_lock_rec
*locks
,
131 const struct server_id pid
,
132 enum g_lock_type lock_type
)
134 struct g_lock_rec
*result
;
136 result
= talloc_realloc(mem_ctx
, locks
, struct g_lock_rec
,
138 if (result
== NULL
) {
142 result
[num_locks
].pid
= pid
;
143 result
[num_locks
].lock_type
= lock_type
;
147 static void g_lock_got_retry(struct messaging_context
*msg
,
150 struct server_id server_id
,
152 static void g_lock_timedout(struct tevent_context
*ev
,
153 struct tevent_timer
*te
,
154 struct timeval current_time
,
157 static NTSTATUS
g_lock_trylock(struct g_lock_ctx
*ctx
, const char *name
,
158 enum g_lock_type lock_type
)
160 struct db_record
*rec
= NULL
;
161 struct g_lock_rec
*locks
= NULL
;
163 struct server_id self
;
166 NTSTATUS status
= NT_STATUS_OK
;
167 NTSTATUS store_status
;
170 rec
= ctx
->db
->fetch_locked(ctx
->db
, talloc_tos(),
171 string_term_tdb_data(name
));
173 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name
));
174 status
= NT_STATUS_LOCK_NOT_GRANTED
;
178 if (!g_lock_parse(talloc_tos(), rec
->value
, &num_locks
, &locks
)) {
179 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
180 status
= NT_STATUS_INTERNAL_ERROR
;
184 self
= procid_self();
187 for (i
=0; i
<num_locks
; i
++) {
188 if (procid_equal(&self
, &locks
[i
].pid
)) {
189 if (our_index
!= -1) {
190 DEBUG(1, ("g_lock_trylock: Added ourself "
192 status
= NT_STATUS_INTERNAL_ERROR
;
195 if ((locks
[i
].lock_type
& G_LOCK_PENDING
) == 0) {
196 DEBUG(1, ("g_lock_trylock: Found ourself not "
198 status
= NT_STATUS_INTERNAL_ERROR
;
204 /* never conflict with ourself */
207 if (g_lock_conflicts(lock_type
, &locks
[i
])) {
208 struct server_id pid
= locks
[i
].pid
;
210 if (!process_exists(pid
)) {
213 status
= g_lock_force_unlock(ctx
, name
, pid
);
214 if (!NT_STATUS_IS_OK(status
)) {
215 DEBUG(1, ("Could not unlock dead lock "
221 lock_type
|= G_LOCK_PENDING
;
225 if (our_index
== -1) {
226 /* First round, add ourself */
228 locks
= g_lock_addrec(talloc_tos(), locks
, num_locks
,
231 DEBUG(10, ("g_lock_addrec failed\n"));
232 status
= NT_STATUS_NO_MEMORY
;
237 * Retry. We were pending last time. Overwrite the
238 * stored lock_type with what we calculated, we might
239 * have acquired the lock this time.
241 locks
[our_index
].lock_type
= lock_type
;
244 data
= make_tdb_data((uint8_t *)locks
, talloc_get_size(locks
));
245 store_status
= rec
->store(rec
, data
, 0);
246 if (!NT_STATUS_IS_OK(store_status
)) {
247 DEBUG(1, ("rec->store failed: %s\n",
248 nt_errstr(store_status
)));
249 status
= store_status
;
256 if (NT_STATUS_IS_OK(status
) && (lock_type
& G_LOCK_PENDING
) != 0) {
257 return STATUS_PENDING
;
263 NTSTATUS
g_lock_lock(struct g_lock_ctx
*ctx
, const char *name
,
264 enum g_lock_type lock_type
, struct timeval timeout
)
266 struct tevent_timer
*te
= NULL
;
269 struct timeval timeout_end
;
270 struct timeval timeout_remaining
;
271 struct timeval time_now
;
273 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type
,
276 if (lock_type
& ~1) {
277 DEBUG(1, ("Got invalid lock type %d for %s\n",
278 (int)lock_type
, name
));
279 return NT_STATUS_INVALID_PARAMETER
;
282 #ifdef CLUSTER_SUPPORT
283 if (lp_clustering()) {
284 status
= ctdb_watch_us(messaging_ctdbd_connection());
285 if (!NT_STATUS_IS_OK(status
)) {
286 DEBUG(10, ("could not register retry with ctdb: %s\n",
293 status
= messaging_register(ctx
->msg
, &retry
, MSG_DBWRAP_G_LOCK_RETRY
,
295 if (!NT_STATUS_IS_OK(status
)) {
296 DEBUG(10, ("messaging_register failed: %s\n",
301 time_now
= timeval_current();
302 timeout_end
= timeval_sum(&time_now
, &timeout
);
306 fd_set
*r_fds
= NULL
;
310 status
= g_lock_trylock(ctx
, name
, lock_type
);
311 if (NT_STATUS_IS_OK(status
)) {
312 DEBUG(10, ("Got lock %s\n", name
));
315 if (!NT_STATUS_EQUAL(status
, STATUS_PENDING
)) {
316 DEBUG(10, ("g_lock_trylock failed: %s\n",
321 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
323 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
324 * !!! HACK ALERT --- FIX ME !!!
325 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
326 * What we really want to do here is to react to
327 * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
328 * by a client doing g_lock_unlock or by ourselves when
329 * we receive a CTDB_SRVID_SAMBA_NOTIFY or
330 * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
331 * either a client holding a lock or a complete node
334 * Doing this properly involves calling tevent_loop_once(),
335 * but doing this here with the main ctdbd messaging context
336 * creates a nested event loop when g_lock_lock() is called
337 * from the main event loop, e.g. in a tcon_and_X where the
338 * share_info.tdb needs to be initialized and is locked by
339 * another process, or when the remore registry is accessed
340 * for writing and some other process already holds a lock
341 * on the registry.tdb.
343 * So as a quick fix, we act a little corasely here: we do
344 * a select on the ctdb connection fd and when it is readable
345 * or we get EINTR, then we retry without actually parsing
346 * any ctdb packages or dispatching messages. This means that
347 * we retry more often than intended by design, but this does
348 * not harm and it is unobtrusive. When we have finished,
349 * the main loop will pick up all the messages and ctdb
350 * packets. The only extra twist is that we cannot use timed
351 * events here but have to handcode a timeout.
354 #ifdef CLUSTER_SUPPORT
355 if (lp_clustering()) {
356 struct ctdbd_connection
*conn
= messaging_ctdbd_connection();
360 max_fd
= ctdbd_conn_get_fd(conn
);
361 FD_SET(max_fd
, r_fds
);
365 time_now
= timeval_current();
366 timeout_remaining
= timeval_until(&time_now
, &timeout_end
);
368 ret
= sys_select(max_fd
+ 1, r_fds
, NULL
, NULL
,
372 if (errno
!= EINTR
) {
373 DEBUG(1, ("error calling select: %s\n",
375 status
= NT_STATUS_INTERNAL_ERROR
;
380 * This means a signal was received.
381 * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
384 } else if (ret
== 0) {
385 if (timeval_expired(&timeout_end
)) {
386 DEBUG(10, ("g_lock_lock timed out\n"));
387 status
= NT_STATUS_LOCK_NOT_GRANTED
;
390 DEBUG(10, ("select returned 0 but timeout not "
391 "not expired: strange - retrying\n"));
393 } else if (ret
!= 1) {
394 DEBUG(1, ("invalid return code of select: %d\n", ret
));
395 status
= NT_STATUS_INTERNAL_ERROR
;
400 * This means ctdbd has sent us some data.
401 * Might be a CTDB_SRVID_RECONFIGURE or a
402 * CTDB_SRVID_SAMBA_NOTIFY message.
409 if (!NT_STATUS_IS_OK(status
)) {
410 NTSTATUS unlock_status
;
412 unlock_status
= g_lock_unlock(ctx
, name
);
414 if (!NT_STATUS_IS_OK(unlock_status
)) {
415 DEBUG(1, ("Could not remove ourself from the locking "
416 "db: %s\n", nt_errstr(status
)));
420 messaging_deregister(ctx
->msg
, MSG_DBWRAP_G_LOCK_RETRY
, &retry
);
426 static void g_lock_got_retry(struct messaging_context
*msg
,
429 struct server_id server_id
,
432 bool *pretry
= (bool *)private_data
;
434 DEBUG(10, ("Got retry message from pid %s\n",
435 procid_str(talloc_tos(), &server_id
)));
440 static void g_lock_timedout(struct tevent_context
*ev
,
441 struct tevent_timer
*te
,
442 struct timeval current_time
,
445 bool *ptimedout
= (bool *)private_data
;
450 static NTSTATUS
g_lock_force_unlock(struct g_lock_ctx
*ctx
, const char *name
,
451 struct server_id pid
)
453 struct db_record
*rec
= NULL
;
454 struct g_lock_rec
*locks
= NULL
;
456 enum g_lock_type lock_type
;
459 rec
= ctx
->db
->fetch_locked(ctx
->db
, talloc_tos(),
460 string_term_tdb_data(name
));
462 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name
));
463 status
= NT_STATUS_INTERNAL_ERROR
;
467 if (!g_lock_parse(talloc_tos(), rec
->value
, &num_locks
, &locks
)) {
468 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
469 status
= NT_STATUS_INTERNAL_ERROR
;
473 for (i
=0; i
<num_locks
; i
++) {
474 if (procid_equal(&pid
, &locks
[i
].pid
)) {
479 if (i
== num_locks
) {
480 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
481 status
= NT_STATUS_INTERNAL_ERROR
;
485 lock_type
= locks
[i
].lock_type
;
487 if (i
< (num_locks
-1)) {
488 locks
[i
] = locks
[num_locks
-1];
492 if (num_locks
== 0) {
493 status
= rec
->delete_rec(rec
);
496 data
= make_tdb_data((uint8_t *)locks
,
497 sizeof(struct g_lock_rec
) * num_locks
);
498 status
= rec
->store(rec
, data
, 0);
501 if (!NT_STATUS_IS_OK(status
)) {
502 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
507 if ((lock_type
& G_LOCK_PENDING
) == 0) {
509 * We've been the lock holder. Tell all others to retry.
511 for (i
=0; i
<num_locks
; i
++) {
512 if ((locks
[i
].lock_type
& G_LOCK_PENDING
) == 0) {
517 * Ping all waiters to retry
519 status
= messaging_send(ctx
->msg
, locks
[i
].pid
,
520 MSG_DBWRAP_G_LOCK_RETRY
,
522 if (!NT_STATUS_IS_OK(status
)) {
523 DEBUG(1, ("sending retry to %s failed: %s\n",
524 procid_str(talloc_tos(),
537 NTSTATUS
g_lock_unlock(struct g_lock_ctx
*ctx
, const char *name
)
541 status
= g_lock_force_unlock(ctx
, name
, procid_self());
543 #ifdef CLUSTER_SUPPORT
544 if (lp_clustering()) {
545 ctdb_unwatch(messaging_ctdbd_connection());
551 struct g_lock_locks_state
{
552 int (*fn
)(const char *name
, void *private_data
);
556 static int g_lock_locks_fn(struct db_record
*rec
, void *priv
)
558 struct g_lock_locks_state
*state
= (struct g_lock_locks_state
*)priv
;
560 if ((rec
->key
.dsize
== 0) || (rec
->key
.dptr
[rec
->key
.dsize
-1] != 0)) {
561 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
564 return state
->fn((char *)rec
->key
.dptr
, state
->private_data
);
567 int g_lock_locks(struct g_lock_ctx
*ctx
,
568 int (*fn
)(const char *name
, void *private_data
),
571 struct g_lock_locks_state state
;
574 state
.private_data
= private_data
;
576 return ctx
->db
->traverse_read(ctx
->db
, g_lock_locks_fn
, &state
);
579 NTSTATUS
g_lock_dump(struct g_lock_ctx
*ctx
, const char *name
,
580 int (*fn
)(struct server_id pid
,
581 enum g_lock_type lock_type
,
587 struct g_lock_rec
*locks
= NULL
;
590 if (ctx
->db
->fetch(ctx
->db
, talloc_tos(), string_term_tdb_data(name
),
592 return NT_STATUS_NOT_FOUND
;
595 if ((data
.dsize
== 0) || (data
.dptr
== NULL
)) {
599 ret
= g_lock_parse(talloc_tos(), data
, &num_locks
, &locks
);
601 TALLOC_FREE(data
.dptr
);
604 DEBUG(10, ("g_lock_parse for %s failed\n", name
));
605 return NT_STATUS_INTERNAL_ERROR
;
608 for (i
=0; i
<num_locks
; i
++) {
609 if (fn(locks
[i
].pid
, locks
[i
].lock_type
, private_data
) != 0) {
617 struct g_lock_get_state
{
619 struct server_id
*pid
;
622 static int g_lock_get_fn(struct server_id pid
, enum g_lock_type lock_type
,
625 struct g_lock_get_state
*state
= (struct g_lock_get_state
*)priv
;
627 if ((lock_type
& G_LOCK_PENDING
) != 0) {
636 NTSTATUS
g_lock_get(struct g_lock_ctx
*ctx
, const char *name
,
637 struct server_id
*pid
)
639 struct g_lock_get_state state
;
645 status
= g_lock_dump(ctx
, name
, g_lock_get_fn
, &state
);
646 if (!NT_STATUS_IS_OK(status
)) {
650 return NT_STATUS_NOT_FOUND
;