Oops. Don't break the build..
[Samba.git] / source4 / ntvfs / common / brlock_tdb.c
blob299400b96c40fa6917a15407e0aa722be469f2c7
1 /*
2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "../tdb/include/tdb.h"
30 #include "messaging/messaging.h"
31 #include "tdb_wrap.h"
32 #include "lib/messaging/irpc.h"
33 #include "libcli/libcli.h"
34 #include "cluster/cluster.h"
35 #include "ntvfs/common/brlock.h"
36 #include "ntvfs/ntvfs.h"
37 #include "param/param.h"
40 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
41 a file. For a local posix filesystem this will usually be a combination
42 of the device and inode numbers of the file, but it can be anything
43 that uniquely idetifies a file for locking purposes, as long
44 as it is applied consistently.
47 /* this struct is typicaly attached to tcon */
48 struct brl_context {
49 struct tdb_wrap *w;
50 struct server_id server;
51 struct messaging_context *messaging_ctx;
55 the lock context contains the elements that define whether one
56 lock is the same as another lock
58 struct lock_context {
59 struct server_id server;
60 uint32_t smbpid;
61 struct brl_context *ctx;
64 /* The data in brlock records is an unsorted linear array of these
65 records. It is unnecessary to store the count as tdb provides the
66 size of the record */
67 struct lock_struct {
68 struct lock_context context;
69 struct ntvfs_handle *ntvfs;
70 uint64_t start;
71 uint64_t size;
72 enum brl_type lock_type;
73 void *notify_ptr;
76 /* this struct is attached to on oprn file handle */
77 struct brl_handle {
78 DATA_BLOB key;
79 struct ntvfs_handle *ntvfs;
80 struct lock_struct last_lock;
84 Open up the brlock.tdb database. Close it down using
85 talloc_free(). We need the messaging_ctx to allow for
86 pending lock notifications.
88 static struct brl_context *brl_tdb_init(TALLOC_CTX *mem_ctx, struct server_id server,
89 struct loadparm_context *lp_ctx,
90 struct messaging_context *messaging_ctx)
92 struct brl_context *brl;
94 brl = talloc(mem_ctx, struct brl_context);
95 if (brl == NULL) {
96 return NULL;
99 brl->w = cluster_tdb_tmp_open(brl, lp_ctx, "brlock.tdb", TDB_DEFAULT);
100 if (brl->w == NULL) {
101 talloc_free(brl);
102 return NULL;
105 brl->server = server;
106 brl->messaging_ctx = messaging_ctx;
108 return brl;
111 static struct brl_handle *brl_tdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
112 DATA_BLOB *file_key)
114 struct brl_handle *brlh;
116 brlh = talloc(mem_ctx, struct brl_handle);
117 if (brlh == NULL) {
118 return NULL;
121 brlh->key = *file_key;
122 brlh->ntvfs = ntvfs;
123 ZERO_STRUCT(brlh->last_lock);
125 return brlh;
129 see if two locking contexts are equal
131 static bool brl_tdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
133 return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
134 ctx1->smbpid == ctx2->smbpid &&
135 ctx1->ctx == ctx2->ctx);
139 see if lck1 and lck2 overlap
141 lck1 is the existing lock. lck2 is the new lock we are
142 looking at adding
144 static bool brl_tdb_overlap(struct lock_struct *lck1,
145 struct lock_struct *lck2)
147 /* this extra check is not redundent - it copes with locks
148 that go beyond the end of 64 bit file space */
149 if (lck1->size != 0 &&
150 lck1->start == lck2->start &&
151 lck1->size == lck2->size) {
152 return true;
155 if (lck1->start >= (lck2->start+lck2->size) ||
156 lck2->start >= (lck1->start+lck1->size)) {
157 return false;
160 /* we have a conflict. Now check to see if lck1 really still
161 * exists, which involves checking if the process still
162 * exists. We leave this test to last as its the most
163 * expensive test, especially when we are clustered */
164 /* TODO: need to do this via a server_id_exists() call, which
165 * hasn't been written yet. When clustered this will need to
166 * call into ctdb */
168 return true;
172 See if lock2 can be added when lock1 is in place.
174 static bool brl_tdb_conflict(struct lock_struct *lck1,
175 struct lock_struct *lck2)
177 /* pending locks don't conflict with anything */
178 if (lck1->lock_type >= PENDING_READ_LOCK ||
179 lck2->lock_type >= PENDING_READ_LOCK) {
180 return false;
183 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
184 return false;
187 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
188 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
189 return false;
192 return brl_tdb_overlap(lck1, lck2);
197 Check to see if this lock conflicts, but ignore our own locks on the
198 same fnum only.
200 static bool brl_tdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
202 /* pending locks don't conflict with anything */
203 if (lck1->lock_type >= PENDING_READ_LOCK ||
204 lck2->lock_type >= PENDING_READ_LOCK) {
205 return false;
208 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
209 return false;
212 * note that incoming write calls conflict with existing READ
213 * locks even if the context is the same. JRA. See LOCKTEST7
214 * in smbtorture.
216 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
217 lck1->ntvfs == lck2->ntvfs &&
218 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
219 return false;
222 return brl_tdb_overlap(lck1, lck2);
227 amazingly enough, w2k3 "remembers" whether the last lock failure
228 is the same as this one and changes its error code. I wonder if any
229 app depends on this?
231 static NTSTATUS brl_tdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
234 * this function is only called for non pending lock!
237 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
238 if (lock->ntvfs->ctx->protocol == PROTOCOL_SMB2) {
239 return NT_STATUS_LOCK_NOT_GRANTED;
243 * if the notify_ptr is non NULL,
244 * it means that we're at the end of a pending lock
245 * and the real lock is requested after the timout went by
246 * In this case we need to remember the last_lock and always
247 * give FILE_LOCK_CONFLICT
249 if (lock->notify_ptr) {
250 brlh->last_lock = *lock;
251 return NT_STATUS_FILE_LOCK_CONFLICT;
255 * amazing the little things you learn with a test
256 * suite. Locks beyond this offset (as a 64 bit
257 * number!) always generate the conflict error code,
258 * unless the top bit is set
260 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
261 brlh->last_lock = *lock;
262 return NT_STATUS_FILE_LOCK_CONFLICT;
266 * if the current lock matches the last failed lock on the file handle
267 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
269 if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
270 lock->context.ctx == brlh->last_lock.context.ctx &&
271 lock->ntvfs == brlh->last_lock.ntvfs &&
272 lock->start == brlh->last_lock.start) {
273 return NT_STATUS_FILE_LOCK_CONFLICT;
276 brlh->last_lock = *lock;
277 return NT_STATUS_LOCK_NOT_GRANTED;
281 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
282 which case a real lock is first tried, and if that fails then a
283 pending lock is created. When the pending lock is triggered (by
284 someone else closing an overlapping lock range) a messaging
285 notification is sent, identified by the notify_ptr
287 static NTSTATUS brl_tdb_lock(struct brl_context *brl,
288 struct brl_handle *brlh,
289 uint32_t smbpid,
290 uint64_t start, uint64_t size,
291 enum brl_type lock_type,
292 void *notify_ptr)
294 TDB_DATA kbuf, dbuf;
295 int count=0, i;
296 struct lock_struct lock, *locks=NULL;
297 NTSTATUS status;
299 kbuf.dptr = brlh->key.data;
300 kbuf.dsize = brlh->key.length;
302 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
303 return NT_STATUS_INTERNAL_DB_CORRUPTION;
306 /* if this is a pending lock, then with the chainlock held we
307 try to get the real lock. If we succeed then we don't need
308 to make it pending. This prevents a possible race condition
309 where the pending lock gets created after the lock that is
310 preventing the real lock gets removed */
311 if (lock_type >= PENDING_READ_LOCK) {
312 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
314 /* here we need to force that the last_lock isn't overwritten */
315 lock = brlh->last_lock;
316 status = brl_tdb_lock(brl, brlh, smbpid, start, size, rw, NULL);
317 brlh->last_lock = lock;
319 if (NT_STATUS_IS_OK(status)) {
320 tdb_chainunlock(brl->w->tdb, kbuf);
321 return NT_STATUS_OK;
325 dbuf = tdb_fetch(brl->w->tdb, kbuf);
327 lock.context.smbpid = smbpid;
328 lock.context.server = brl->server;
329 lock.context.ctx = brl;
330 lock.ntvfs = brlh->ntvfs;
331 lock.context.ctx = brl;
332 lock.start = start;
333 lock.size = size;
334 lock.lock_type = lock_type;
335 lock.notify_ptr = notify_ptr;
337 if (dbuf.dptr) {
338 /* there are existing locks - make sure they don't conflict */
339 locks = (struct lock_struct *)dbuf.dptr;
340 count = dbuf.dsize / sizeof(*locks);
341 for (i=0; i<count; i++) {
342 if (brl_tdb_conflict(&locks[i], &lock)) {
343 status = brl_tdb_lock_failed(brlh, &lock);
344 goto fail;
349 /* no conflicts - add it to the list of locks */
350 locks = realloc_p(locks, struct lock_struct, count+1);
351 if (!locks) {
352 status = NT_STATUS_NO_MEMORY;
353 goto fail;
354 } else {
355 dbuf.dptr = (uint8_t *)locks;
357 locks[count] = lock;
358 dbuf.dsize += sizeof(lock);
360 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
361 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
362 goto fail;
365 free(dbuf.dptr);
366 tdb_chainunlock(brl->w->tdb, kbuf);
368 /* the caller needs to know if the real lock was granted. If
369 we have reached here then it must be a pending lock that
370 was granted, so tell them the lock failed */
371 if (lock_type >= PENDING_READ_LOCK) {
372 return NT_STATUS_LOCK_NOT_GRANTED;
375 return NT_STATUS_OK;
377 fail:
379 free(dbuf.dptr);
380 tdb_chainunlock(brl->w->tdb, kbuf);
381 return status;
386 we are removing a lock that might be holding up a pending lock. Scan for pending
387 locks that cover this range and if we find any then notify the server that it should
388 retry the lock
390 static void brl_tdb_notify_unlock(struct brl_context *brl,
391 struct lock_struct *locks, int count,
392 struct lock_struct *removed_lock)
394 int i, last_notice;
396 /* the last_notice logic is to prevent stampeding on a lock
397 range. It prevents us sending hundreds of notifies on the
398 same range of bytes. It doesn't prevent all possible
399 stampedes, but it does prevent the most common problem */
400 last_notice = -1;
402 for (i=0;i<count;i++) {
403 if (locks[i].lock_type >= PENDING_READ_LOCK &&
404 brl_tdb_overlap(&locks[i], removed_lock)) {
405 if (last_notice != -1 && brl_tdb_overlap(&locks[i], &locks[last_notice])) {
406 continue;
408 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
409 last_notice = i;
411 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
412 MSG_BRL_RETRY, locks[i].notify_ptr);
419 send notifications for all pending locks - the file is being closed by this
420 user
422 static void brl_tdb_notify_all(struct brl_context *brl,
423 struct lock_struct *locks, int count)
425 int i;
426 for (i=0;i<count;i++) {
427 if (locks->lock_type >= PENDING_READ_LOCK) {
428 brl_tdb_notify_unlock(brl, locks, count, &locks[i]);
436 Unlock a range of bytes.
438 static NTSTATUS brl_tdb_unlock(struct brl_context *brl,
439 struct brl_handle *brlh,
440 uint32_t smbpid,
441 uint64_t start, uint64_t size)
443 TDB_DATA kbuf, dbuf;
444 int count, i;
445 struct lock_struct *locks, *lock;
446 struct lock_context context;
447 NTSTATUS status;
449 kbuf.dptr = brlh->key.data;
450 kbuf.dsize = brlh->key.length;
452 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
453 return NT_STATUS_INTERNAL_DB_CORRUPTION;
456 dbuf = tdb_fetch(brl->w->tdb, kbuf);
457 if (!dbuf.dptr) {
458 tdb_chainunlock(brl->w->tdb, kbuf);
459 return NT_STATUS_RANGE_NOT_LOCKED;
462 context.smbpid = smbpid;
463 context.server = brl->server;
464 context.ctx = brl;
466 /* there are existing locks - find a match */
467 locks = (struct lock_struct *)dbuf.dptr;
468 count = dbuf.dsize / sizeof(*locks);
470 for (i=0; i<count; i++) {
471 lock = &locks[i];
472 if (brl_tdb_same_context(&lock->context, &context) &&
473 lock->ntvfs == brlh->ntvfs &&
474 lock->start == start &&
475 lock->size == size &&
476 lock->lock_type == WRITE_LOCK) {
477 break;
480 if (i < count) goto found;
482 for (i=0; i<count; i++) {
483 lock = &locks[i];
484 if (brl_tdb_same_context(&lock->context, &context) &&
485 lock->ntvfs == brlh->ntvfs &&
486 lock->start == start &&
487 lock->size == size &&
488 lock->lock_type < PENDING_READ_LOCK) {
489 break;
493 found:
494 if (i < count) {
495 /* found it - delete it */
496 if (count == 1) {
497 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
498 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
499 goto fail;
501 } else {
502 struct lock_struct removed_lock = *lock;
503 if (i < count-1) {
504 memmove(&locks[i], &locks[i+1],
505 sizeof(*locks)*((count-1) - i));
507 count--;
509 /* send notifications for any relevant pending locks */
510 brl_tdb_notify_unlock(brl, locks, count, &removed_lock);
512 dbuf.dsize = count * sizeof(*locks);
514 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
515 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
516 goto fail;
520 free(dbuf.dptr);
521 tdb_chainunlock(brl->w->tdb, kbuf);
522 return NT_STATUS_OK;
525 /* we didn't find it */
526 status = NT_STATUS_RANGE_NOT_LOCKED;
528 fail:
529 free(dbuf.dptr);
530 tdb_chainunlock(brl->w->tdb, kbuf);
531 return status;
536 remove a pending lock. This is called when the caller has either
537 given up trying to establish a lock or when they have succeeded in
538 getting it. In either case they no longer need to be notified.
540 static NTSTATUS brl_tdb_remove_pending(struct brl_context *brl,
541 struct brl_handle *brlh,
542 void *notify_ptr)
544 TDB_DATA kbuf, dbuf;
545 int count, i;
546 struct lock_struct *locks;
547 NTSTATUS status;
549 kbuf.dptr = brlh->key.data;
550 kbuf.dsize = brlh->key.length;
552 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
553 return NT_STATUS_INTERNAL_DB_CORRUPTION;
556 dbuf = tdb_fetch(brl->w->tdb, kbuf);
557 if (!dbuf.dptr) {
558 tdb_chainunlock(brl->w->tdb, kbuf);
559 return NT_STATUS_RANGE_NOT_LOCKED;
562 /* there are existing locks - find a match */
563 locks = (struct lock_struct *)dbuf.dptr;
564 count = dbuf.dsize / sizeof(*locks);
566 for (i=0; i<count; i++) {
567 struct lock_struct *lock = &locks[i];
569 if (lock->lock_type >= PENDING_READ_LOCK &&
570 lock->notify_ptr == notify_ptr &&
571 cluster_id_equal(&lock->context.server, &brl->server)) {
572 /* found it - delete it */
573 if (count == 1) {
574 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
575 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
576 goto fail;
578 } else {
579 if (i < count-1) {
580 memmove(&locks[i], &locks[i+1],
581 sizeof(*locks)*((count-1) - i));
583 count--;
584 dbuf.dsize = count * sizeof(*locks);
585 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
586 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
587 goto fail;
591 free(dbuf.dptr);
592 tdb_chainunlock(brl->w->tdb, kbuf);
593 return NT_STATUS_OK;
597 /* we didn't find it */
598 status = NT_STATUS_RANGE_NOT_LOCKED;
600 fail:
601 free(dbuf.dptr);
602 tdb_chainunlock(brl->w->tdb, kbuf);
603 return status;
608 Test if we are allowed to perform IO on a region of an open file
610 static NTSTATUS brl_tdb_locktest(struct brl_context *brl,
611 struct brl_handle *brlh,
612 uint32_t smbpid,
613 uint64_t start, uint64_t size,
614 enum brl_type lock_type)
616 TDB_DATA kbuf, dbuf;
617 int count, i;
618 struct lock_struct lock, *locks;
620 kbuf.dptr = brlh->key.data;
621 kbuf.dsize = brlh->key.length;
623 dbuf = tdb_fetch(brl->w->tdb, kbuf);
624 if (dbuf.dptr == NULL) {
625 return NT_STATUS_OK;
628 lock.context.smbpid = smbpid;
629 lock.context.server = brl->server;
630 lock.context.ctx = brl;
631 lock.ntvfs = brlh->ntvfs;
632 lock.start = start;
633 lock.size = size;
634 lock.lock_type = lock_type;
636 /* there are existing locks - make sure they don't conflict */
637 locks = (struct lock_struct *)dbuf.dptr;
638 count = dbuf.dsize / sizeof(*locks);
640 for (i=0; i<count; i++) {
641 if (brl_tdb_conflict_other(&locks[i], &lock)) {
642 free(dbuf.dptr);
643 return NT_STATUS_FILE_LOCK_CONFLICT;
647 free(dbuf.dptr);
648 return NT_STATUS_OK;
653 Remove any locks associated with a open file.
655 static NTSTATUS brl_tdb_close(struct brl_context *brl,
656 struct brl_handle *brlh)
658 TDB_DATA kbuf, dbuf;
659 int count, i, dcount=0;
660 struct lock_struct *locks;
661 NTSTATUS status;
663 kbuf.dptr = brlh->key.data;
664 kbuf.dsize = brlh->key.length;
666 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
667 return NT_STATUS_INTERNAL_DB_CORRUPTION;
670 dbuf = tdb_fetch(brl->w->tdb, kbuf);
671 if (!dbuf.dptr) {
672 tdb_chainunlock(brl->w->tdb, kbuf);
673 return NT_STATUS_OK;
676 /* there are existing locks - remove any for this fnum */
677 locks = (struct lock_struct *)dbuf.dptr;
678 count = dbuf.dsize / sizeof(*locks);
680 for (i=0; i<count; i++) {
681 struct lock_struct *lock = &locks[i];
683 if (lock->context.ctx == brl &&
684 cluster_id_equal(&lock->context.server, &brl->server) &&
685 lock->ntvfs == brlh->ntvfs) {
686 /* found it - delete it */
687 if (count > 1 && i < count-1) {
688 memmove(&locks[i], &locks[i+1],
689 sizeof(*locks)*((count-1) - i));
691 count--;
692 i--;
693 dcount++;
697 status = NT_STATUS_OK;
699 if (count == 0) {
700 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
701 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
703 } else if (dcount != 0) {
704 /* tell all pending lock holders for this file that
705 they have a chance now. This is a bit indiscriminant,
706 but works OK */
707 brl_tdb_notify_all(brl, locks, count);
709 dbuf.dsize = count * sizeof(*locks);
711 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
712 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
716 free(dbuf.dptr);
717 tdb_chainunlock(brl->w->tdb, kbuf);
719 return status;
723 static const struct brlock_ops brlock_tdb_ops = {
724 .brl_init = brl_tdb_init,
725 .brl_create_handle = brl_tdb_create_handle,
726 .brl_lock = brl_tdb_lock,
727 .brl_unlock = brl_tdb_unlock,
728 .brl_remove_pending = brl_tdb_remove_pending,
729 .brl_locktest = brl_tdb_locktest,
730 .brl_close = brl_tdb_close
734 void brl_tdb_init_ops(void)
736 brl_set_ops(&brlock_tdb_ops);