r13121: Tag 4.0.0TP1
[Samba.git] / source / ntvfs / common / brlock.c
blob3fc3c09316a6d39e8ddb225ae020e9a3aa2fd3f3
1 /*
2 Unix SMB/CIFS implementation.
4 generic byte range locking code
6 Copyright (C) Andrew Tridgell 1992-2004
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 /* This module implements a tdb based byte range locking service,
25 replacing the fcntl() based byte range locking previously
26 used. This allows us to provide the same semantics as NT */
28 #include "includes.h"
29 #include "system/filesys.h"
30 #include "lib/tdb/include/tdb.h"
31 #include "messaging/messaging.h"
32 #include "db_wrap.h"
33 #include "lib/messaging/irpc.h"
36 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
37 a file. For a local posix filesystem this will usually be a combination
38 of the device and inode numbers of the file, but it can be anything
39 that uniquely idetifies a file for locking purposes, as long
40 as it is applied consistently.
44 the lock context contains the elements that define whether one
45 lock is the same as another lock
47 struct lock_context {
48 uint32_t server;
49 uint16_t smbpid;
50 uint16_t tid;
53 /* The data in brlock records is an unsorted linear array of these
54 records. It is unnecessary to store the count as tdb provides the
55 size of the record */
56 struct lock_struct {
57 struct lock_context context;
58 uint64_t start;
59 uint64_t size;
60 uint16_t fnum;
61 enum brl_type lock_type;
62 void *notify_ptr;
65 struct brl_context {
66 struct tdb_wrap *w;
67 uint32_t server;
68 uint16_t tid;
69 struct messaging_context *messaging_ctx;
70 struct lock_struct last_lock;
75 Open up the brlock.tdb database. Close it down using
76 talloc_free(). We need the messaging_ctx to allow for
77 pending lock notifications.
79 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server, uint16_t tid,
80 struct messaging_context *messaging_ctx)
82 char *path;
83 struct brl_context *brl;
85 brl = talloc(mem_ctx, struct brl_context);
86 if (brl == NULL) {
87 return NULL;
90 path = smbd_tmp_path(brl, "brlock.tdb");
91 brl->w = tdb_wrap_open(brl, path, 0,
92 TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
93 talloc_free(path);
94 if (brl->w == NULL) {
95 talloc_free(brl);
96 return NULL;
99 brl->server = server;
100 brl->tid = tid;
101 brl->messaging_ctx = messaging_ctx;
102 ZERO_STRUCT(brl->last_lock);
104 return brl;
109 see if two locking contexts are equal
111 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
113 return (ctx1->server == ctx2->server &&
114 ctx1->smbpid == ctx2->smbpid &&
115 ctx1->tid == ctx2->tid);
119 see if lck1 and lck2 overlap
121 static BOOL brl_overlap(struct lock_struct *lck1,
122 struct lock_struct *lck2)
124 /* this extra check is not redundent - it copes with locks
125 that go beyond the end of 64 bit file space */
126 if (lck1->size != 0 &&
127 lck1->start == lck2->start &&
128 lck1->size == lck2->size) {
129 return True;
132 if (lck1->start >= (lck2->start+lck2->size) ||
133 lck2->start >= (lck1->start+lck1->size)) {
134 return False;
136 return True;
140 See if lock2 can be added when lock1 is in place.
142 static BOOL brl_conflict(struct lock_struct *lck1,
143 struct lock_struct *lck2)
145 /* pending locks don't conflict with anything */
146 if (lck1->lock_type >= PENDING_READ_LOCK ||
147 lck2->lock_type >= PENDING_READ_LOCK) {
148 return False;
151 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
152 return False;
155 if (brl_same_context(&lck1->context, &lck2->context) &&
156 lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
157 return False;
160 return brl_overlap(lck1, lck2);
165 Check to see if this lock conflicts, but ignore our own locks on the
166 same fnum only.
168 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
170 /* pending locks don't conflict with anything */
171 if (lck1->lock_type >= PENDING_READ_LOCK ||
172 lck2->lock_type >= PENDING_READ_LOCK) {
173 return False;
176 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
177 return False;
180 * note that incoming write calls conflict with existing READ
181 * locks even if the context is the same. JRA. See LOCKTEST7
182 * in smbtorture.
184 if (brl_same_context(&lck1->context, &lck2->context) &&
185 lck1->fnum == lck2->fnum &&
186 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
187 return False;
190 return brl_overlap(lck1, lck2);
195 amazingly enough, w2k3 "remembers" whether the last lock failure
196 is the same as this one and changes its error code. I wonder if any
197 app depends on this?
199 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
201 if (lock->context.server == brl->last_lock.context.server &&
202 lock->context.tid == brl->last_lock.context.tid &&
203 lock->fnum == brl->last_lock.fnum &&
204 lock->start == brl->last_lock.start &&
205 lock->size == brl->last_lock.size) {
206 return NT_STATUS_FILE_LOCK_CONFLICT;
208 brl->last_lock = *lock;
209 if (lock->start >= 0xEF000000 &&
210 (lock->start >> 63) == 0) {
211 /* amazing the little things you learn with a test
212 suite. Locks beyond this offset (as a 64 bit
213 number!) always generate the conflict error code,
214 unless the top bit is set */
215 return NT_STATUS_FILE_LOCK_CONFLICT;
217 return NT_STATUS_LOCK_NOT_GRANTED;
221 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
222 which case a real lock is first tried, and if that fails then a
223 pending lock is created. When the pending lock is triggered (by
224 someone else closing an overlapping lock range) a messaging
225 notification is sent, identified by the notify_ptr
227 NTSTATUS brl_lock(struct brl_context *brl,
228 DATA_BLOB *file_key,
229 uint16_t smbpid,
230 uint16_t fnum,
231 uint64_t start, uint64_t size,
232 enum brl_type lock_type,
233 void *notify_ptr)
235 TDB_DATA kbuf, dbuf;
236 int count=0, i;
237 struct lock_struct lock, *locks=NULL;
238 NTSTATUS status;
240 kbuf.dptr = (char *)file_key->data;
241 kbuf.dsize = file_key->length;
243 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
244 return NT_STATUS_INTERNAL_DB_CORRUPTION;
247 /* if this is a pending lock, then with the chainlock held we
248 try to get the real lock. If we succeed then we don't need
249 to make it pending. This prevents a possible race condition
250 where the pending lock gets created after the lock that is
251 preventing the real lock gets removed */
252 if (lock_type >= PENDING_READ_LOCK) {
253 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
254 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
255 if (NT_STATUS_IS_OK(status)) {
256 tdb_chainunlock(brl->w->tdb, kbuf);
257 return NT_STATUS_OK;
261 dbuf = tdb_fetch(brl->w->tdb, kbuf);
263 lock.context.smbpid = smbpid;
264 lock.context.server = brl->server;
265 lock.context.tid = brl->tid;
266 lock.start = start;
267 lock.size = size;
268 lock.fnum = fnum;
269 lock.lock_type = lock_type;
270 lock.notify_ptr = notify_ptr;
272 if (dbuf.dptr) {
273 /* there are existing locks - make sure they don't conflict */
274 locks = (struct lock_struct *)dbuf.dptr;
275 count = dbuf.dsize / sizeof(*locks);
276 for (i=0; i<count; i++) {
277 if (brl_conflict(&locks[i], &lock)) {
278 status = brl_lock_failed(brl, &lock);
279 goto fail;
284 /* no conflicts - add it to the list of locks */
285 locks = realloc_p(locks, struct lock_struct, count+1);
286 if (!locks) {
287 status = NT_STATUS_NO_MEMORY;
288 goto fail;
289 } else {
290 dbuf.dptr = (char *)locks;
292 locks[count] = lock;
293 dbuf.dsize += sizeof(lock);
295 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
296 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
297 goto fail;
300 free(dbuf.dptr);
301 tdb_chainunlock(brl->w->tdb, kbuf);
303 /* the caller needs to know if the real lock was granted. If
304 we have reached here then it must be a pending lock that
305 was granted, so tell them the lock failed */
306 if (lock_type >= PENDING_READ_LOCK) {
307 return brl_lock_failed(brl, &lock);
310 return NT_STATUS_OK;
312 fail:
314 free(dbuf.dptr);
315 tdb_chainunlock(brl->w->tdb, kbuf);
316 return status;
321 we are removing a lock that might be holding up a pending lock. Scan for pending
322 locks that cover this range and if we find any then notify the server that it should
323 retry the lock
325 static void brl_notify_unlock(struct brl_context *brl,
326 struct lock_struct *locks, int count,
327 struct lock_struct *removed_lock)
329 int i, last_notice;
331 /* the last_notice logic is to prevent stampeding on a lock
332 range. It prevents us sending hundreds of notifies on the
333 same range of bytes. It doesn't prevent all possible
334 stampedes, but it does prevent the most common problem */
335 last_notice = -1;
337 for (i=0;i<count;i++) {
338 if (locks[i].lock_type >= PENDING_READ_LOCK &&
339 brl_overlap(&locks[i], removed_lock)) {
340 if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
341 continue;
343 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
344 last_notice = i;
346 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
347 MSG_BRL_RETRY, locks[i].notify_ptr);
354 send notifications for all pending locks - the file is being closed by this
355 user
357 static void brl_notify_all(struct brl_context *brl,
358 struct lock_struct *locks, int count)
360 int i;
361 for (i=0;i<count;i++) {
362 if (locks->lock_type >= PENDING_READ_LOCK) {
363 brl_notify_unlock(brl, locks, count, &locks[i]);
371 Unlock a range of bytes.
373 NTSTATUS brl_unlock(struct brl_context *brl,
374 DATA_BLOB *file_key,
375 uint16_t smbpid,
376 uint16_t fnum,
377 uint64_t start, uint64_t size)
379 TDB_DATA kbuf, dbuf;
380 int count, i;
381 struct lock_struct *locks;
382 struct lock_context context;
383 NTSTATUS status;
385 kbuf.dptr = (char *)file_key->data;
386 kbuf.dsize = file_key->length;
388 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
389 return NT_STATUS_INTERNAL_DB_CORRUPTION;
392 dbuf = tdb_fetch(brl->w->tdb, kbuf);
393 if (!dbuf.dptr) {
394 tdb_chainunlock(brl->w->tdb, kbuf);
395 return NT_STATUS_RANGE_NOT_LOCKED;
398 context.smbpid = smbpid;
399 context.server = brl->server;
400 context.tid = brl->tid;
402 /* there are existing locks - find a match */
403 locks = (struct lock_struct *)dbuf.dptr;
404 count = dbuf.dsize / sizeof(*locks);
406 for (i=0; i<count; i++) {
407 struct lock_struct *lock = &locks[i];
409 if (brl_same_context(&lock->context, &context) &&
410 lock->fnum == fnum &&
411 lock->start == start &&
412 lock->size == size &&
413 lock->notify_ptr == NULL) {
414 /* found it - delete it */
415 if (count == 1) {
416 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
417 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
418 goto fail;
420 } else {
421 struct lock_struct removed_lock = *lock;
422 if (i < count-1) {
423 memmove(&locks[i], &locks[i+1],
424 sizeof(*locks)*((count-1) - i));
426 count--;
428 /* send notifications for any relevant pending locks */
429 brl_notify_unlock(brl, locks, count, &removed_lock);
431 dbuf.dsize = count * sizeof(*locks);
433 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
434 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
435 goto fail;
439 free(dbuf.dptr);
440 tdb_chainunlock(brl->w->tdb, kbuf);
441 return NT_STATUS_OK;
445 /* we didn't find it */
446 status = NT_STATUS_RANGE_NOT_LOCKED;
448 fail:
449 free(dbuf.dptr);
450 tdb_chainunlock(brl->w->tdb, kbuf);
451 return status;
456 remove a pending lock. This is called when the caller has either
457 given up trying to establish a lock or when they have succeeded in
458 getting it. In either case they no longer need to be notified.
460 NTSTATUS brl_remove_pending(struct brl_context *brl,
461 DATA_BLOB *file_key,
462 void *notify_ptr)
464 TDB_DATA kbuf, dbuf;
465 int count, i;
466 struct lock_struct *locks;
467 NTSTATUS status;
469 kbuf.dptr = (char *)file_key->data;
470 kbuf.dsize = file_key->length;
472 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
473 return NT_STATUS_INTERNAL_DB_CORRUPTION;
476 dbuf = tdb_fetch(brl->w->tdb, kbuf);
477 if (!dbuf.dptr) {
478 tdb_chainunlock(brl->w->tdb, kbuf);
479 return NT_STATUS_RANGE_NOT_LOCKED;
482 /* there are existing locks - find a match */
483 locks = (struct lock_struct *)dbuf.dptr;
484 count = dbuf.dsize / sizeof(*locks);
486 for (i=0; i<count; i++) {
487 struct lock_struct *lock = &locks[i];
489 if (lock->notify_ptr == notify_ptr &&
490 lock->context.server == brl->server) {
491 /* found it - delete it */
492 if (count == 1) {
493 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
494 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
495 goto fail;
497 } else {
498 if (i < count-1) {
499 memmove(&locks[i], &locks[i+1],
500 sizeof(*locks)*((count-1) - i));
502 count--;
503 dbuf.dsize = count * sizeof(*locks);
504 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
505 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
506 goto fail;
510 free(dbuf.dptr);
511 tdb_chainunlock(brl->w->tdb, kbuf);
512 return NT_STATUS_OK;
516 /* we didn't find it */
517 status = NT_STATUS_RANGE_NOT_LOCKED;
519 fail:
520 free(dbuf.dptr);
521 tdb_chainunlock(brl->w->tdb, kbuf);
522 return status;
527 Test if we are allowed to perform IO on a region of an open file
529 NTSTATUS brl_locktest(struct brl_context *brl,
530 DATA_BLOB *file_key,
531 uint16_t fnum,
532 uint16_t smbpid,
533 uint64_t start, uint64_t size,
534 enum brl_type lock_type)
536 TDB_DATA kbuf, dbuf;
537 int count, i;
538 struct lock_struct lock, *locks;
540 kbuf.dptr = (char *)file_key->data;
541 kbuf.dsize = file_key->length;
543 dbuf = tdb_fetch(brl->w->tdb, kbuf);
544 if (dbuf.dptr == NULL) {
545 return NT_STATUS_OK;
548 lock.context.smbpid = smbpid;
549 lock.context.server = brl->server;
550 lock.context.tid = brl->tid;
551 lock.start = start;
552 lock.size = size;
553 lock.fnum = fnum;
554 lock.lock_type = lock_type;
556 /* there are existing locks - make sure they don't conflict */
557 locks = (struct lock_struct *)dbuf.dptr;
558 count = dbuf.dsize / sizeof(*locks);
560 for (i=0; i<count; i++) {
561 if (brl_conflict_other(&locks[i], &lock)) {
562 free(dbuf.dptr);
563 return NT_STATUS_FILE_LOCK_CONFLICT;
567 free(dbuf.dptr);
568 return NT_STATUS_OK;
573 Remove any locks associated with a open file.
575 NTSTATUS brl_close(struct brl_context *brl,
576 DATA_BLOB *file_key, int fnum)
578 TDB_DATA kbuf, dbuf;
579 int count, i, dcount=0;
580 struct lock_struct *locks;
581 NTSTATUS status;
583 kbuf.dptr = (char *)file_key->data;
584 kbuf.dsize = file_key->length;
586 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
587 return NT_STATUS_INTERNAL_DB_CORRUPTION;
590 dbuf = tdb_fetch(brl->w->tdb, kbuf);
591 if (!dbuf.dptr) {
592 tdb_chainunlock(brl->w->tdb, kbuf);
593 return NT_STATUS_OK;
596 /* there are existing locks - remove any for this fnum */
597 locks = (struct lock_struct *)dbuf.dptr;
598 count = dbuf.dsize / sizeof(*locks);
600 for (i=0; i<count; i++) {
601 struct lock_struct *lock = &locks[i];
603 if (lock->context.tid == brl->tid &&
604 lock->context.server == brl->server &&
605 lock->fnum == fnum) {
606 /* found it - delete it */
607 if (count > 1 && i < count-1) {
608 memmove(&locks[i], &locks[i+1],
609 sizeof(*locks)*((count-1) - i));
611 count--;
612 i--;
613 dcount++;
617 status = NT_STATUS_OK;
619 if (count == 0) {
620 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
621 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
623 } else if (dcount != 0) {
624 /* tell all pending lock holders for this file that
625 they have a chance now. This is a bit indiscriminant,
626 but works OK */
627 brl_notify_all(brl, locks, count);
629 dbuf.dsize = count * sizeof(*locks);
631 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
632 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
636 free(dbuf.dptr);
637 tdb_chainunlock(brl->w->tdb, kbuf);
639 return status;