2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
28 #include "system/filesys.h"
29 #include "../tdb/include/tdb.h"
30 #include "messaging/messaging.h"
32 #include "lib/messaging/irpc.h"
33 #include "libcli/libcli.h"
34 #include "cluster/cluster.h"
35 #include "ntvfs/common/brlock.h"
36 #include "ntvfs/ntvfs.h"
37 #include "param/param.h"
40 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
41 a file. For a local posix filesystem this will usually be a combination
42 of the device and inode numbers of the file, but it can be anything
43 that uniquely idetifies a file for locking purposes, as long
44 as it is applied consistently.
47 /* this struct is typicaly attached to tcon */
50 struct server_id server
;
51 struct messaging_context
*messaging_ctx
;
55 the lock context contains the elements that define whether one
56 lock is the same as another lock
59 struct server_id server
;
61 struct brl_context
*ctx
;
64 /* The data in brlock records is an unsorted linear array of these
65 records. It is unnecessary to store the count as tdb provides the
68 struct lock_context context
;
69 struct ntvfs_handle
*ntvfs
;
72 enum brl_type lock_type
;
76 /* this struct is attached to on oprn file handle */
79 struct ntvfs_handle
*ntvfs
;
80 struct lock_struct last_lock
;
84 Open up the brlock.tdb database. Close it down using
85 talloc_free(). We need the messaging_ctx to allow for
86 pending lock notifications.
88 static struct brl_context
*brl_tdb_init(TALLOC_CTX
*mem_ctx
, struct server_id server
,
89 struct loadparm_context
*lp_ctx
,
90 struct messaging_context
*messaging_ctx
)
92 struct brl_context
*brl
;
94 brl
= talloc(mem_ctx
, struct brl_context
);
99 brl
->w
= cluster_tdb_tmp_open(brl
, lp_ctx
, "brlock.tdb", TDB_DEFAULT
);
100 if (brl
->w
== NULL
) {
105 brl
->server
= server
;
106 brl
->messaging_ctx
= messaging_ctx
;
111 static struct brl_handle
*brl_tdb_create_handle(TALLOC_CTX
*mem_ctx
, struct ntvfs_handle
*ntvfs
,
114 struct brl_handle
*brlh
;
116 brlh
= talloc(mem_ctx
, struct brl_handle
);
121 brlh
->key
= *file_key
;
123 ZERO_STRUCT(brlh
->last_lock
);
129 see if two locking contexts are equal
131 static bool brl_tdb_same_context(struct lock_context
*ctx1
, struct lock_context
*ctx2
)
133 return (cluster_id_equal(&ctx1
->server
, &ctx2
->server
) &&
134 ctx1
->smbpid
== ctx2
->smbpid
&&
135 ctx1
->ctx
== ctx2
->ctx
);
139 see if lck1 and lck2 overlap
141 lck1 is the existing lock. lck2 is the new lock we are
144 static bool brl_tdb_overlap(struct lock_struct
*lck1
,
145 struct lock_struct
*lck2
)
147 /* this extra check is not redundent - it copes with locks
148 that go beyond the end of 64 bit file space */
149 if (lck1
->size
!= 0 &&
150 lck1
->start
== lck2
->start
&&
151 lck1
->size
== lck2
->size
) {
155 if (lck1
->start
>= (lck2
->start
+lck2
->size
) ||
156 lck2
->start
>= (lck1
->start
+lck1
->size
)) {
160 /* we have a conflict. Now check to see if lck1 really still
161 * exists, which involves checking if the process still
162 * exists. We leave this test to last as its the most
163 * expensive test, especially when we are clustered */
164 /* TODO: need to do this via a server_id_exists() call, which
165 * hasn't been written yet. When clustered this will need to
172 See if lock2 can be added when lock1 is in place.
174 static bool brl_tdb_conflict(struct lock_struct
*lck1
,
175 struct lock_struct
*lck2
)
177 /* pending locks don't conflict with anything */
178 if (lck1
->lock_type
>= PENDING_READ_LOCK
||
179 lck2
->lock_type
>= PENDING_READ_LOCK
) {
183 if (lck1
->lock_type
== READ_LOCK
&& lck2
->lock_type
== READ_LOCK
) {
187 if (brl_tdb_same_context(&lck1
->context
, &lck2
->context
) &&
188 lck2
->lock_type
== READ_LOCK
&& lck1
->ntvfs
== lck2
->ntvfs
) {
192 return brl_tdb_overlap(lck1
, lck2
);
197 Check to see if this lock conflicts, but ignore our own locks on the
200 static bool brl_tdb_conflict_other(struct lock_struct
*lck1
, struct lock_struct
*lck2
)
202 /* pending locks don't conflict with anything */
203 if (lck1
->lock_type
>= PENDING_READ_LOCK
||
204 lck2
->lock_type
>= PENDING_READ_LOCK
) {
208 if (lck1
->lock_type
== READ_LOCK
&& lck2
->lock_type
== READ_LOCK
)
212 * note that incoming write calls conflict with existing READ
213 * locks even if the context is the same. JRA. See LOCKTEST7
216 if (brl_tdb_same_context(&lck1
->context
, &lck2
->context
) &&
217 lck1
->ntvfs
== lck2
->ntvfs
&&
218 (lck2
->lock_type
== READ_LOCK
|| lck1
->lock_type
== WRITE_LOCK
)) {
222 return brl_tdb_overlap(lck1
, lck2
);
227 amazingly enough, w2k3 "remembers" whether the last lock failure
228 is the same as this one and changes its error code. I wonder if any
231 static NTSTATUS
brl_tdb_lock_failed(struct brl_handle
*brlh
, struct lock_struct
*lock
)
234 * this function is only called for non pending lock!
237 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
238 if (lock
->ntvfs
->ctx
->protocol
== PROTOCOL_SMB2
) {
239 return NT_STATUS_LOCK_NOT_GRANTED
;
243 * if the notify_ptr is non NULL,
244 * it means that we're at the end of a pending lock
245 * and the real lock is requested after the timout went by
246 * In this case we need to remember the last_lock and always
247 * give FILE_LOCK_CONFLICT
249 if (lock
->notify_ptr
) {
250 brlh
->last_lock
= *lock
;
251 return NT_STATUS_FILE_LOCK_CONFLICT
;
255 * amazing the little things you learn with a test
256 * suite. Locks beyond this offset (as a 64 bit
257 * number!) always generate the conflict error code,
258 * unless the top bit is set
260 if (lock
->start
>= 0xEF000000 && (lock
->start
>> 63) == 0) {
261 brlh
->last_lock
= *lock
;
262 return NT_STATUS_FILE_LOCK_CONFLICT
;
266 * if the current lock matches the last failed lock on the file handle
267 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
269 if (cluster_id_equal(&lock
->context
.server
, &brlh
->last_lock
.context
.server
) &&
270 lock
->context
.ctx
== brlh
->last_lock
.context
.ctx
&&
271 lock
->ntvfs
== brlh
->last_lock
.ntvfs
&&
272 lock
->start
== brlh
->last_lock
.start
) {
273 return NT_STATUS_FILE_LOCK_CONFLICT
;
276 brlh
->last_lock
= *lock
;
277 return NT_STATUS_LOCK_NOT_GRANTED
;
281 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
282 which case a real lock is first tried, and if that fails then a
283 pending lock is created. When the pending lock is triggered (by
284 someone else closing an overlapping lock range) a messaging
285 notification is sent, identified by the notify_ptr
287 static NTSTATUS
brl_tdb_lock(struct brl_context
*brl
,
288 struct brl_handle
*brlh
,
290 uint64_t start
, uint64_t size
,
291 enum brl_type lock_type
,
296 struct lock_struct lock
, *locks
=NULL
;
299 kbuf
.dptr
= brlh
->key
.data
;
300 kbuf
.dsize
= brlh
->key
.length
;
302 if (tdb_chainlock(brl
->w
->tdb
, kbuf
) != 0) {
303 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
306 /* if this is a pending lock, then with the chainlock held we
307 try to get the real lock. If we succeed then we don't need
308 to make it pending. This prevents a possible race condition
309 where the pending lock gets created after the lock that is
310 preventing the real lock gets removed */
311 if (lock_type
>= PENDING_READ_LOCK
) {
312 enum brl_type rw
= (lock_type
==PENDING_READ_LOCK
? READ_LOCK
: WRITE_LOCK
);
314 /* here we need to force that the last_lock isn't overwritten */
315 lock
= brlh
->last_lock
;
316 status
= brl_tdb_lock(brl
, brlh
, smbpid
, start
, size
, rw
, NULL
);
317 brlh
->last_lock
= lock
;
319 if (NT_STATUS_IS_OK(status
)) {
320 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
325 dbuf
= tdb_fetch(brl
->w
->tdb
, kbuf
);
327 lock
.context
.smbpid
= smbpid
;
328 lock
.context
.server
= brl
->server
;
329 lock
.context
.ctx
= brl
;
330 lock
.ntvfs
= brlh
->ntvfs
;
331 lock
.context
.ctx
= brl
;
334 lock
.lock_type
= lock_type
;
335 lock
.notify_ptr
= notify_ptr
;
338 /* there are existing locks - make sure they don't conflict */
339 locks
= (struct lock_struct
*)dbuf
.dptr
;
340 count
= dbuf
.dsize
/ sizeof(*locks
);
341 for (i
=0; i
<count
; i
++) {
342 if (brl_tdb_conflict(&locks
[i
], &lock
)) {
343 status
= brl_tdb_lock_failed(brlh
, &lock
);
349 /* no conflicts - add it to the list of locks */
350 locks
= realloc_p(locks
, struct lock_struct
, count
+1);
352 status
= NT_STATUS_NO_MEMORY
;
355 dbuf
.dptr
= (uint8_t *)locks
;
358 dbuf
.dsize
+= sizeof(lock
);
360 if (tdb_store(brl
->w
->tdb
, kbuf
, dbuf
, TDB_REPLACE
) != 0) {
361 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
366 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
368 /* the caller needs to know if the real lock was granted. If
369 we have reached here then it must be a pending lock that
370 was granted, so tell them the lock failed */
371 if (lock_type
>= PENDING_READ_LOCK
) {
372 return NT_STATUS_LOCK_NOT_GRANTED
;
380 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
386 we are removing a lock that might be holding up a pending lock. Scan for pending
387 locks that cover this range and if we find any then notify the server that it should
390 static void brl_tdb_notify_unlock(struct brl_context
*brl
,
391 struct lock_struct
*locks
, int count
,
392 struct lock_struct
*removed_lock
)
396 /* the last_notice logic is to prevent stampeding on a lock
397 range. It prevents us sending hundreds of notifies on the
398 same range of bytes. It doesn't prevent all possible
399 stampedes, but it does prevent the most common problem */
402 for (i
=0;i
<count
;i
++) {
403 if (locks
[i
].lock_type
>= PENDING_READ_LOCK
&&
404 brl_tdb_overlap(&locks
[i
], removed_lock
)) {
405 if (last_notice
!= -1 && brl_tdb_overlap(&locks
[i
], &locks
[last_notice
])) {
408 if (locks
[i
].lock_type
== PENDING_WRITE_LOCK
) {
411 messaging_send_ptr(brl
->messaging_ctx
, locks
[i
].context
.server
,
412 MSG_BRL_RETRY
, locks
[i
].notify_ptr
);
419 send notifications for all pending locks - the file is being closed by this
422 static void brl_tdb_notify_all(struct brl_context
*brl
,
423 struct lock_struct
*locks
, int count
)
426 for (i
=0;i
<count
;i
++) {
427 if (locks
->lock_type
>= PENDING_READ_LOCK
) {
428 brl_tdb_notify_unlock(brl
, locks
, count
, &locks
[i
]);
436 Unlock a range of bytes.
438 static NTSTATUS
brl_tdb_unlock(struct brl_context
*brl
,
439 struct brl_handle
*brlh
,
441 uint64_t start
, uint64_t size
)
445 struct lock_struct
*locks
, *lock
;
446 struct lock_context context
;
449 kbuf
.dptr
= brlh
->key
.data
;
450 kbuf
.dsize
= brlh
->key
.length
;
452 if (tdb_chainlock(brl
->w
->tdb
, kbuf
) != 0) {
453 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
456 dbuf
= tdb_fetch(brl
->w
->tdb
, kbuf
);
458 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
459 return NT_STATUS_RANGE_NOT_LOCKED
;
462 context
.smbpid
= smbpid
;
463 context
.server
= brl
->server
;
466 /* there are existing locks - find a match */
467 locks
= (struct lock_struct
*)dbuf
.dptr
;
468 count
= dbuf
.dsize
/ sizeof(*locks
);
470 for (i
=0; i
<count
; i
++) {
472 if (brl_tdb_same_context(&lock
->context
, &context
) &&
473 lock
->ntvfs
== brlh
->ntvfs
&&
474 lock
->start
== start
&&
475 lock
->size
== size
&&
476 lock
->lock_type
== WRITE_LOCK
) {
480 if (i
< count
) goto found
;
482 for (i
=0; i
<count
; i
++) {
484 if (brl_tdb_same_context(&lock
->context
, &context
) &&
485 lock
->ntvfs
== brlh
->ntvfs
&&
486 lock
->start
== start
&&
487 lock
->size
== size
&&
488 lock
->lock_type
< PENDING_READ_LOCK
) {
495 /* found it - delete it */
497 if (tdb_delete(brl
->w
->tdb
, kbuf
) != 0) {
498 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
502 struct lock_struct removed_lock
= *lock
;
504 memmove(&locks
[i
], &locks
[i
+1],
505 sizeof(*locks
)*((count
-1) - i
));
509 /* send notifications for any relevant pending locks */
510 brl_tdb_notify_unlock(brl
, locks
, count
, &removed_lock
);
512 dbuf
.dsize
= count
* sizeof(*locks
);
514 if (tdb_store(brl
->w
->tdb
, kbuf
, dbuf
, TDB_REPLACE
) != 0) {
515 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
521 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
525 /* we didn't find it */
526 status
= NT_STATUS_RANGE_NOT_LOCKED
;
530 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
536 remove a pending lock. This is called when the caller has either
537 given up trying to establish a lock or when they have succeeded in
538 getting it. In either case they no longer need to be notified.
540 static NTSTATUS
brl_tdb_remove_pending(struct brl_context
*brl
,
541 struct brl_handle
*brlh
,
546 struct lock_struct
*locks
;
549 kbuf
.dptr
= brlh
->key
.data
;
550 kbuf
.dsize
= brlh
->key
.length
;
552 if (tdb_chainlock(brl
->w
->tdb
, kbuf
) != 0) {
553 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
556 dbuf
= tdb_fetch(brl
->w
->tdb
, kbuf
);
558 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
559 return NT_STATUS_RANGE_NOT_LOCKED
;
562 /* there are existing locks - find a match */
563 locks
= (struct lock_struct
*)dbuf
.dptr
;
564 count
= dbuf
.dsize
/ sizeof(*locks
);
566 for (i
=0; i
<count
; i
++) {
567 struct lock_struct
*lock
= &locks
[i
];
569 if (lock
->lock_type
>= PENDING_READ_LOCK
&&
570 lock
->notify_ptr
== notify_ptr
&&
571 cluster_id_equal(&lock
->context
.server
, &brl
->server
)) {
572 /* found it - delete it */
574 if (tdb_delete(brl
->w
->tdb
, kbuf
) != 0) {
575 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
580 memmove(&locks
[i
], &locks
[i
+1],
581 sizeof(*locks
)*((count
-1) - i
));
584 dbuf
.dsize
= count
* sizeof(*locks
);
585 if (tdb_store(brl
->w
->tdb
, kbuf
, dbuf
, TDB_REPLACE
) != 0) {
586 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
592 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
597 /* we didn't find it */
598 status
= NT_STATUS_RANGE_NOT_LOCKED
;
602 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
608 Test if we are allowed to perform IO on a region of an open file
610 static NTSTATUS
brl_tdb_locktest(struct brl_context
*brl
,
611 struct brl_handle
*brlh
,
613 uint64_t start
, uint64_t size
,
614 enum brl_type lock_type
)
618 struct lock_struct lock
, *locks
;
620 kbuf
.dptr
= brlh
->key
.data
;
621 kbuf
.dsize
= brlh
->key
.length
;
623 dbuf
= tdb_fetch(brl
->w
->tdb
, kbuf
);
624 if (dbuf
.dptr
== NULL
) {
628 lock
.context
.smbpid
= smbpid
;
629 lock
.context
.server
= brl
->server
;
630 lock
.context
.ctx
= brl
;
631 lock
.ntvfs
= brlh
->ntvfs
;
634 lock
.lock_type
= lock_type
;
636 /* there are existing locks - make sure they don't conflict */
637 locks
= (struct lock_struct
*)dbuf
.dptr
;
638 count
= dbuf
.dsize
/ sizeof(*locks
);
640 for (i
=0; i
<count
; i
++) {
641 if (brl_tdb_conflict_other(&locks
[i
], &lock
)) {
643 return NT_STATUS_FILE_LOCK_CONFLICT
;
653 Remove any locks associated with a open file.
655 static NTSTATUS
brl_tdb_close(struct brl_context
*brl
,
656 struct brl_handle
*brlh
)
659 int count
, i
, dcount
=0;
660 struct lock_struct
*locks
;
663 kbuf
.dptr
= brlh
->key
.data
;
664 kbuf
.dsize
= brlh
->key
.length
;
666 if (tdb_chainlock(brl
->w
->tdb
, kbuf
) != 0) {
667 return NT_STATUS_INTERNAL_DB_CORRUPTION
;
670 dbuf
= tdb_fetch(brl
->w
->tdb
, kbuf
);
672 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
676 /* there are existing locks - remove any for this fnum */
677 locks
= (struct lock_struct
*)dbuf
.dptr
;
678 count
= dbuf
.dsize
/ sizeof(*locks
);
680 for (i
=0; i
<count
; i
++) {
681 struct lock_struct
*lock
= &locks
[i
];
683 if (lock
->context
.ctx
== brl
&&
684 cluster_id_equal(&lock
->context
.server
, &brl
->server
) &&
685 lock
->ntvfs
== brlh
->ntvfs
) {
686 /* found it - delete it */
687 if (count
> 1 && i
< count
-1) {
688 memmove(&locks
[i
], &locks
[i
+1],
689 sizeof(*locks
)*((count
-1) - i
));
697 status
= NT_STATUS_OK
;
700 if (tdb_delete(brl
->w
->tdb
, kbuf
) != 0) {
701 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
703 } else if (dcount
!= 0) {
704 /* tell all pending lock holders for this file that
705 they have a chance now. This is a bit indiscriminant,
707 brl_tdb_notify_all(brl
, locks
, count
);
709 dbuf
.dsize
= count
* sizeof(*locks
);
711 if (tdb_store(brl
->w
->tdb
, kbuf
, dbuf
, TDB_REPLACE
) != 0) {
712 status
= NT_STATUS_INTERNAL_DB_CORRUPTION
;
717 tdb_chainunlock(brl
->w
->tdb
, kbuf
);
723 static const struct brlock_ops brlock_tdb_ops
= {
724 .brl_init
= brl_tdb_init
,
725 .brl_create_handle
= brl_tdb_create_handle
,
726 .brl_lock
= brl_tdb_lock
,
727 .brl_unlock
= brl_tdb_unlock
,
728 .brl_remove_pending
= brl_tdb_remove_pending
,
729 .brl_locktest
= brl_tdb_locktest
,
730 .brl_close
= brl_tdb_close
734 void brl_tdb_init_ops(void)
736 brl_set_ops(&brlock_tdb_ops
);