s4:torture: Add smb2.oplock test batch9a and raw.oplock test batch9a
[Samba.git] / source3 / locking / brlock.c
blob1c4c4d0d293d37f7713de59adb7319dea404192e
1 /*
2 Unix SMB/CIFS implementation.
3 byte range locking code
4 Updated to handle range splits/merges.
6 Copyright (C) Andrew Tridgell 1992-2000
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "locking/proto.h"
30 #include "smbd/globals.h"
31 #include "dbwrap/dbwrap.h"
32 #include "dbwrap/dbwrap_open.h"
33 #include "serverid.h"
34 #include "messages.h"
35 #include "util_tdb.h"
37 #undef DBGC_CLASS
38 #define DBGC_CLASS DBGC_LOCKING
40 #define ZERO_ZERO 0
42 /* The open brlock.tdb database. */
44 static struct db_context *brlock_db;
46 struct byte_range_lock {
47 struct files_struct *fsp;
48 unsigned int num_locks;
49 bool modified;
50 bool have_read_oplocks;
51 struct lock_struct *lock_data;
52 struct db_record *record;
55 /****************************************************************************
56 Debug info at level 10 for lock struct.
57 ****************************************************************************/
59 static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
61 DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
63 (unsigned long long)pls->context.smblctx,
64 (unsigned int)pls->context.tid,
65 server_id_str(talloc_tos(), &pls->context.pid) ));
67 DEBUG(10, ("start = %ju, size = %ju, fnum = %ju, %s %s\n",
68 (uintmax_t)pls->start,
69 (uintmax_t)pls->size,
70 (uintmax_t)pls->fnum,
71 lock_type_name(pls->lock_type),
72 lock_flav_name(pls->lock_flav)));
75 unsigned int brl_num_locks(const struct byte_range_lock *brl)
77 return brl->num_locks;
80 struct files_struct *brl_fsp(struct byte_range_lock *brl)
82 return brl->fsp;
85 bool brl_have_read_oplocks(const struct byte_range_lock *brl)
87 return brl->have_read_oplocks;
90 void brl_set_have_read_oplocks(struct byte_range_lock *brl,
91 bool have_read_oplocks)
93 DEBUG(10, ("Setting have_read_oplocks to %s\n",
94 have_read_oplocks ? "true" : "false"));
95 SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */
96 brl->have_read_oplocks = have_read_oplocks;
97 brl->modified = true;
100 /****************************************************************************
101 See if two locking contexts are equal.
102 ****************************************************************************/
104 static bool brl_same_context(const struct lock_context *ctx1,
105 const struct lock_context *ctx2)
107 return (serverid_equal(&ctx1->pid, &ctx2->pid) &&
108 (ctx1->smblctx == ctx2->smblctx) &&
109 (ctx1->tid == ctx2->tid));
112 /****************************************************************************
113 See if lck1 and lck2 overlap.
114 ****************************************************************************/
116 static bool brl_overlap(const struct lock_struct *lck1,
117 const struct lock_struct *lck2)
119 /* XXX Remove for Win7 compatibility. */
120 /* this extra check is not redundant - it copes with locks
121 that go beyond the end of 64 bit file space */
122 if (lck1->size != 0 &&
123 lck1->start == lck2->start &&
124 lck1->size == lck2->size) {
125 return True;
128 if (lck1->start >= (lck2->start+lck2->size) ||
129 lck2->start >= (lck1->start+lck1->size)) {
130 return False;
132 return True;
135 /****************************************************************************
136 See if lock2 can be added when lock1 is in place.
137 ****************************************************************************/
139 static bool brl_conflict(const struct lock_struct *lck1,
140 const struct lock_struct *lck2)
142 /* Ignore PENDING locks. */
143 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
144 return False;
146 /* Read locks never conflict. */
147 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
148 return False;
151 /* A READ lock can stack on top of a WRITE lock if they have the same
152 * context & fnum. */
153 if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
154 brl_same_context(&lck1->context, &lck2->context) &&
155 lck1->fnum == lck2->fnum) {
156 return False;
159 return brl_overlap(lck1, lck2);
162 /****************************************************************************
163 See if lock2 can be added when lock1 is in place - when both locks are POSIX
164 flavour. POSIX locks ignore fnum - they only care about dev/ino which we
165 know already match.
166 ****************************************************************************/
168 static bool brl_conflict_posix(const struct lock_struct *lck1,
169 const struct lock_struct *lck2)
171 #if defined(DEVELOPER)
172 SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
173 SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
174 #endif
176 /* Ignore PENDING locks. */
177 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
178 return False;
180 /* Read locks never conflict. */
181 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
182 return False;
185 /* Locks on the same context don't conflict. Ignore fnum. */
186 if (brl_same_context(&lck1->context, &lck2->context)) {
187 return False;
190 /* One is read, the other write, or the context is different,
191 do they overlap ? */
192 return brl_overlap(lck1, lck2);
195 #if ZERO_ZERO
196 static bool brl_conflict1(const struct lock_struct *lck1,
197 const struct lock_struct *lck2)
199 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
200 return False;
202 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
203 return False;
206 if (brl_same_context(&lck1->context, &lck2->context) &&
207 lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
208 return False;
211 if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
212 return True;
215 if (lck1->start >= (lck2->start + lck2->size) ||
216 lck2->start >= (lck1->start + lck1->size)) {
217 return False;
220 return True;
222 #endif
224 /****************************************************************************
225 Check to see if this lock conflicts, but ignore our own locks on the
226 same fnum only. This is the read/write lock check code path.
227 This is never used in the POSIX lock case.
228 ****************************************************************************/
230 static bool brl_conflict_other(const struct lock_struct *lock,
231 const struct lock_struct *rw_probe)
233 if (IS_PENDING_LOCK(lock->lock_type) ||
234 IS_PENDING_LOCK(rw_probe->lock_type)) {
235 return False;
238 if (lock->lock_type == READ_LOCK && rw_probe->lock_type == READ_LOCK) {
239 return False;
242 if (lock->lock_flav == POSIX_LOCK &&
243 rw_probe->lock_flav == POSIX_LOCK) {
245 * POSIX flavour locks never conflict here - this is only called
246 * in the read/write path.
248 return False;
251 if (!brl_overlap(lock, rw_probe)) {
253 * I/O can only conflict when overlapping a lock, thus let it
254 * pass
256 return false;
259 if (!brl_same_context(&lock->context, &rw_probe->context)) {
261 * Different process, conflict
263 return true;
266 if (lock->fnum != rw_probe->fnum) {
268 * Different file handle, conflict
270 return true;
273 if ((lock->lock_type == READ_LOCK) &&
274 (rw_probe->lock_type == WRITE_LOCK)) {
276 * Incoming WRITE locks conflict with existing READ locks even
277 * if the context is the same. JRA. See LOCKTEST7 in
278 * smbtorture.
280 return true;
284 * I/O request compatible with existing lock, let it pass without
285 * conflict
288 return false;
291 /****************************************************************************
292 Check if an unlock overlaps a pending lock.
293 ****************************************************************************/
295 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
297 if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
298 return True;
299 if ((lock->start >= pend_lock->start) && (lock->start < pend_lock->start + pend_lock->size))
300 return True;
301 return False;
304 /****************************************************************************
305 Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
306 is the same as this one and changes its error code. I wonder if any
307 app depends on this ?
308 ****************************************************************************/
310 static NTSTATUS brl_lock_failed(files_struct *fsp,
311 const struct lock_struct *lock,
312 bool blocking_lock)
314 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
315 /* amazing the little things you learn with a test
316 suite. Locks beyond this offset (as a 64 bit
317 number!) always generate the conflict error code,
318 unless the top bit is set */
319 if (!blocking_lock) {
320 fsp->last_lock_failure = *lock;
322 return NT_STATUS_FILE_LOCK_CONFLICT;
325 if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
326 lock->context.tid == fsp->last_lock_failure.context.tid &&
327 lock->fnum == fsp->last_lock_failure.fnum &&
328 lock->start == fsp->last_lock_failure.start) {
329 return NT_STATUS_FILE_LOCK_CONFLICT;
332 if (!blocking_lock) {
333 fsp->last_lock_failure = *lock;
335 return NT_STATUS_LOCK_NOT_GRANTED;
338 /****************************************************************************
339 Open up the brlock.tdb database.
340 ****************************************************************************/
342 void brl_init(bool read_only)
344 int tdb_flags;
346 if (brlock_db) {
347 return;
350 tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
352 if (!lp_clustering()) {
354 * We can't use the SEQNUM trick to cache brlock
355 * entries in the clustering case because ctdb seqnum
356 * propagation has a delay.
358 tdb_flags |= TDB_SEQNUM;
361 brlock_db = db_open(NULL, lock_path("brlock.tdb"),
362 SMB_OPEN_DATABASE_TDB_HASH_SIZE, tdb_flags,
363 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
364 DBWRAP_LOCK_ORDER_2, DBWRAP_FLAG_NONE);
365 if (!brlock_db) {
366 DEBUG(0,("Failed to open byte range locking database %s\n",
367 lock_path("brlock.tdb")));
368 return;
372 /****************************************************************************
373 Close down the brlock.tdb database.
374 ****************************************************************************/
376 void brl_shutdown(void)
378 TALLOC_FREE(brlock_db);
381 #if ZERO_ZERO
382 /****************************************************************************
383 Compare two locks for sorting.
384 ****************************************************************************/
386 static int lock_compare(const struct lock_struct *lck1,
387 const struct lock_struct *lck2)
389 if (lck1->start != lck2->start) {
390 return (lck1->start - lck2->start);
392 if (lck2->size != lck1->size) {
393 return ((int)lck1->size - (int)lck2->size);
395 return 0;
397 #endif
399 /****************************************************************************
400 Lock a range of bytes - Windows lock semantics.
401 ****************************************************************************/
403 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
404 struct lock_struct *plock, bool blocking_lock)
406 unsigned int i;
407 files_struct *fsp = br_lck->fsp;
408 struct lock_struct *locks = br_lck->lock_data;
409 NTSTATUS status;
411 SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
413 if ((plock->start + plock->size - 1 < plock->start) &&
414 plock->size != 0) {
415 return NT_STATUS_INVALID_LOCK_RANGE;
418 for (i=0; i < br_lck->num_locks; i++) {
419 /* Do any Windows or POSIX locks conflict ? */
420 if (brl_conflict(&locks[i], plock)) {
421 if (!serverid_exists(&locks[i].context.pid)) {
422 locks[i].context.pid.pid = 0;
423 br_lck->modified = true;
424 continue;
426 /* Remember who blocked us. */
427 plock->context.smblctx = locks[i].context.smblctx;
428 return brl_lock_failed(fsp,plock,blocking_lock);
430 #if ZERO_ZERO
431 if (plock->start == 0 && plock->size == 0 &&
432 locks[i].size == 0) {
433 break;
435 #endif
438 if (!IS_PENDING_LOCK(plock->lock_type)) {
439 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
442 /* We can get the Windows lock, now see if it needs to
443 be mapped into a lower level POSIX one, and if so can
444 we get it ? */
446 if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
447 int errno_ret;
448 if (!set_posix_lock_windows_flavour(fsp,
449 plock->start,
450 plock->size,
451 plock->lock_type,
452 &plock->context,
453 locks,
454 br_lck->num_locks,
455 &errno_ret)) {
457 /* We don't know who blocked us. */
458 plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
460 if (errno_ret == EACCES || errno_ret == EAGAIN) {
461 status = NT_STATUS_FILE_LOCK_CONFLICT;
462 goto fail;
463 } else {
464 status = map_nt_error_from_unix(errno);
465 goto fail;
470 /* no conflicts - add it to the list of locks */
471 locks = talloc_realloc(br_lck, locks, struct lock_struct,
472 (br_lck->num_locks + 1));
473 if (!locks) {
474 status = NT_STATUS_NO_MEMORY;
475 goto fail;
478 memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
479 br_lck->num_locks += 1;
480 br_lck->lock_data = locks;
481 br_lck->modified = True;
483 return NT_STATUS_OK;
484 fail:
485 if (!IS_PENDING_LOCK(plock->lock_type)) {
486 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
488 return status;
491 /****************************************************************************
492 Cope with POSIX range splits and merges.
493 ****************************************************************************/
495 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr, /* Output array. */
496 struct lock_struct *ex, /* existing lock. */
497 struct lock_struct *plock) /* proposed lock. */
499 bool lock_types_differ = (ex->lock_type != plock->lock_type);
501 /* We can't merge non-conflicting locks on different context - ignore fnum. */
503 if (!brl_same_context(&ex->context, &plock->context)) {
504 /* Just copy. */
505 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
506 return 1;
509 /* We now know we have the same context. */
511 /* Did we overlap ? */
513 /*********************************************
514 +---------+
515 | ex |
516 +---------+
517 +-------+
518 | plock |
519 +-------+
520 OR....
521 +---------+
522 | ex |
523 +---------+
524 **********************************************/
526 if ( (ex->start > (plock->start + plock->size)) ||
527 (plock->start > (ex->start + ex->size))) {
529 /* No overlap with this lock - copy existing. */
531 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
532 return 1;
535 /*********************************************
536 +---------------------------+
537 | ex |
538 +---------------------------+
539 +---------------------------+
540 | plock | -> replace with plock.
541 +---------------------------+
543 +---------------+
544 | ex |
545 +---------------+
546 +---------------------------+
547 | plock | -> replace with plock.
548 +---------------------------+
550 **********************************************/
552 if ( (ex->start >= plock->start) &&
553 (ex->start + ex->size <= plock->start + plock->size) ) {
555 /* Replace - discard existing lock. */
557 return 0;
560 /*********************************************
561 Adjacent after.
562 +-------+
563 | ex |
564 +-------+
565 +---------------+
566 | plock |
567 +---------------+
569 BECOMES....
570 +---------------+-------+
571 | plock | ex | - different lock types.
572 +---------------+-------+
573 OR.... (merge)
574 +-----------------------+
575 | plock | - same lock type.
576 +-----------------------+
577 **********************************************/
579 if (plock->start + plock->size == ex->start) {
581 /* If the lock types are the same, we merge, if different, we
582 add the remainder of the old lock. */
584 if (lock_types_differ) {
585 /* Add existing. */
586 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
587 return 1;
588 } else {
589 /* Merge - adjust incoming lock as we may have more
590 * merging to come. */
591 plock->size += ex->size;
592 return 0;
596 /*********************************************
597 Adjacent before.
598 +-------+
599 | ex |
600 +-------+
601 +---------------+
602 | plock |
603 +---------------+
604 BECOMES....
605 +-------+---------------+
606 | ex | plock | - different lock types
607 +-------+---------------+
609 OR.... (merge)
610 +-----------------------+
611 | plock | - same lock type.
612 +-----------------------+
614 **********************************************/
616 if (ex->start + ex->size == plock->start) {
618 /* If the lock types are the same, we merge, if different, we
619 add the existing lock. */
621 if (lock_types_differ) {
622 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
623 return 1;
624 } else {
625 /* Merge - adjust incoming lock as we may have more
626 * merging to come. */
627 plock->start = ex->start;
628 plock->size += ex->size;
629 return 0;
633 /*********************************************
634 Overlap after.
635 +-----------------------+
636 | ex |
637 +-----------------------+
638 +---------------+
639 | plock |
640 +---------------+
642 +----------------+
643 | ex |
644 +----------------+
645 +---------------+
646 | plock |
647 +---------------+
649 BECOMES....
650 +---------------+-------+
651 | plock | ex | - different lock types.
652 +---------------+-------+
653 OR.... (merge)
654 +-----------------------+
655 | plock | - same lock type.
656 +-----------------------+
657 **********************************************/
659 if ( (ex->start >= plock->start) &&
660 (ex->start <= plock->start + plock->size) &&
661 (ex->start + ex->size > plock->start + plock->size) ) {
663 /* If the lock types are the same, we merge, if different, we
664 add the remainder of the old lock. */
666 if (lock_types_differ) {
667 /* Add remaining existing. */
668 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
669 /* Adjust existing start and size. */
670 lck_arr[0].start = plock->start + plock->size;
671 lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
672 return 1;
673 } else {
674 /* Merge - adjust incoming lock as we may have more
675 * merging to come. */
676 plock->size += (ex->start + ex->size) - (plock->start + plock->size);
677 return 0;
681 /*********************************************
682 Overlap before.
683 +-----------------------+
684 | ex |
685 +-----------------------+
686 +---------------+
687 | plock |
688 +---------------+
690 +-------------+
691 | ex |
692 +-------------+
693 +---------------+
694 | plock |
695 +---------------+
697 BECOMES....
698 +-------+---------------+
699 | ex | plock | - different lock types
700 +-------+---------------+
702 OR.... (merge)
703 +-----------------------+
704 | plock | - same lock type.
705 +-----------------------+
707 **********************************************/
709 if ( (ex->start < plock->start) &&
710 (ex->start + ex->size >= plock->start) &&
711 (ex->start + ex->size <= plock->start + plock->size) ) {
713 /* If the lock types are the same, we merge, if different, we
714 add the truncated old lock. */
716 if (lock_types_differ) {
717 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
718 /* Adjust existing size. */
719 lck_arr[0].size = plock->start - ex->start;
720 return 1;
721 } else {
722 /* Merge - adjust incoming lock as we may have more
723 * merging to come. MUST ADJUST plock SIZE FIRST ! */
724 plock->size += (plock->start - ex->start);
725 plock->start = ex->start;
726 return 0;
730 /*********************************************
731 Complete overlap.
732 +---------------------------+
733 | ex |
734 +---------------------------+
735 +---------+
736 | plock |
737 +---------+
738 BECOMES.....
739 +-------+---------+---------+
740 | ex | plock | ex | - different lock types.
741 +-------+---------+---------+
743 +---------------------------+
744 | plock | - same lock type.
745 +---------------------------+
746 **********************************************/
748 if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
750 if (lock_types_differ) {
752 /* We have to split ex into two locks here. */
754 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
755 memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
757 /* Adjust first existing size. */
758 lck_arr[0].size = plock->start - ex->start;
760 /* Adjust second existing start and size. */
761 lck_arr[1].start = plock->start + plock->size;
762 lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
763 return 2;
764 } else {
765 /* Just eat the existing locks, merge them into plock. */
766 plock->start = ex->start;
767 plock->size = ex->size;
768 return 0;
772 /* Never get here. */
773 smb_panic("brlock_posix_split_merge");
774 /* Notreached. */
776 /* Keep some compilers happy. */
777 return 0;
780 /****************************************************************************
781 Lock a range of bytes - POSIX lock semantics.
782 We must cope with range splits and merges.
783 ****************************************************************************/
785 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
786 struct byte_range_lock *br_lck,
787 struct lock_struct *plock)
789 unsigned int i, count, posix_count;
790 struct lock_struct *locks = br_lck->lock_data;
791 struct lock_struct *tp;
792 bool signal_pending_read = False;
793 bool break_oplocks = false;
794 NTSTATUS status;
796 /* No zero-zero locks for POSIX. */
797 if (plock->start == 0 && plock->size == 0) {
798 return NT_STATUS_INVALID_PARAMETER;
801 /* Don't allow 64-bit lock wrap. */
802 if (plock->start + plock->size - 1 < plock->start) {
803 return NT_STATUS_INVALID_PARAMETER;
806 /* The worst case scenario here is we have to split an
807 existing POSIX lock range into two, and add our lock,
808 so we need at most 2 more entries. */
810 tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2);
811 if (!tp) {
812 return NT_STATUS_NO_MEMORY;
815 count = posix_count = 0;
817 for (i=0; i < br_lck->num_locks; i++) {
818 struct lock_struct *curr_lock = &locks[i];
820 /* If we have a pending read lock, a lock downgrade should
821 trigger a lock re-evaluation. */
822 if (curr_lock->lock_type == PENDING_READ_LOCK &&
823 brl_pending_overlap(plock, curr_lock)) {
824 signal_pending_read = True;
827 if (curr_lock->lock_flav == WINDOWS_LOCK) {
828 /* Do any Windows flavour locks conflict ? */
829 if (brl_conflict(curr_lock, plock)) {
830 if (!serverid_exists(&curr_lock->context.pid)) {
831 curr_lock->context.pid.pid = 0;
832 br_lck->modified = true;
833 continue;
835 /* No games with error messages. */
836 TALLOC_FREE(tp);
837 /* Remember who blocked us. */
838 plock->context.smblctx = curr_lock->context.smblctx;
839 return NT_STATUS_FILE_LOCK_CONFLICT;
841 /* Just copy the Windows lock into the new array. */
842 memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
843 count++;
844 } else {
845 unsigned int tmp_count = 0;
847 /* POSIX conflict semantics are different. */
848 if (brl_conflict_posix(curr_lock, plock)) {
849 if (!serverid_exists(&curr_lock->context.pid)) {
850 curr_lock->context.pid.pid = 0;
851 br_lck->modified = true;
852 continue;
854 /* Can't block ourselves with POSIX locks. */
855 /* No games with error messages. */
856 TALLOC_FREE(tp);
857 /* Remember who blocked us. */
858 plock->context.smblctx = curr_lock->context.smblctx;
859 return NT_STATUS_FILE_LOCK_CONFLICT;
862 /* Work out overlaps. */
863 tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
864 posix_count += tmp_count;
865 count += tmp_count;
870 * Break oplocks while we hold a brl. Since lock() and unlock() calls
871 * are not symetric with POSIX semantics, we cannot guarantee our
872 * contend_level2_oplocks_begin/end calls will be acquired and
873 * released one-for-one as with Windows semantics. Therefore we only
874 * call contend_level2_oplocks_begin if this is the first POSIX brl on
875 * the file.
877 break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
878 posix_count == 0);
879 if (break_oplocks) {
880 contend_level2_oplocks_begin(br_lck->fsp,
881 LEVEL2_CONTEND_POSIX_BRL);
884 /* Try and add the lock in order, sorted by lock start. */
885 for (i=0; i < count; i++) {
886 struct lock_struct *curr_lock = &tp[i];
888 if (curr_lock->start <= plock->start) {
889 continue;
893 if (i < count) {
894 memmove(&tp[i+1], &tp[i],
895 (count - i)*sizeof(struct lock_struct));
897 memcpy(&tp[i], plock, sizeof(struct lock_struct));
898 count++;
900 /* We can get the POSIX lock, now see if it needs to
901 be mapped into a lower level POSIX one, and if so can
902 we get it ? */
904 if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
905 int errno_ret;
907 /* The lower layer just needs to attempt to
908 get the system POSIX lock. We've weeded out
909 any conflicts above. */
911 if (!set_posix_lock_posix_flavour(br_lck->fsp,
912 plock->start,
913 plock->size,
914 plock->lock_type,
915 &errno_ret)) {
917 /* We don't know who blocked us. */
918 plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
920 if (errno_ret == EACCES || errno_ret == EAGAIN) {
921 TALLOC_FREE(tp);
922 status = NT_STATUS_FILE_LOCK_CONFLICT;
923 goto fail;
924 } else {
925 TALLOC_FREE(tp);
926 status = map_nt_error_from_unix(errno);
927 goto fail;
932 /* If we didn't use all the allocated size,
933 * Realloc so we don't leak entries per lock call. */
934 if (count < br_lck->num_locks + 2) {
935 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
936 if (!tp) {
937 status = NT_STATUS_NO_MEMORY;
938 goto fail;
942 br_lck->num_locks = count;
943 TALLOC_FREE(br_lck->lock_data);
944 br_lck->lock_data = tp;
945 locks = tp;
946 br_lck->modified = True;
948 /* A successful downgrade from write to read lock can trigger a lock
949 re-evalutation where waiting readers can now proceed. */
951 if (signal_pending_read) {
952 /* Send unlock messages to any pending read waiters that overlap. */
953 for (i=0; i < br_lck->num_locks; i++) {
954 struct lock_struct *pend_lock = &locks[i];
956 /* Ignore non-pending locks. */
957 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
958 continue;
961 if (pend_lock->lock_type == PENDING_READ_LOCK &&
962 brl_pending_overlap(plock, pend_lock)) {
963 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
964 procid_str_static(&pend_lock->context.pid )));
966 messaging_send(msg_ctx, pend_lock->context.pid,
967 MSG_SMB_UNLOCK, &data_blob_null);
972 return NT_STATUS_OK;
973 fail:
974 if (break_oplocks) {
975 contend_level2_oplocks_end(br_lck->fsp,
976 LEVEL2_CONTEND_POSIX_BRL);
978 return status;
981 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
982 struct byte_range_lock *br_lck,
983 struct lock_struct *plock,
984 bool blocking_lock)
986 VFS_FIND(brl_lock_windows);
987 return handle->fns->brl_lock_windows_fn(handle, br_lck, plock,
988 blocking_lock);
991 /****************************************************************************
992 Lock a range of bytes.
993 ****************************************************************************/
995 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
996 struct byte_range_lock *br_lck,
997 uint64_t smblctx,
998 struct server_id pid,
999 br_off start,
1000 br_off size,
1001 enum brl_type lock_type,
1002 enum brl_flavour lock_flav,
1003 bool blocking_lock,
1004 uint64_t *psmblctx)
1006 NTSTATUS ret;
1007 struct lock_struct lock;
1009 #if !ZERO_ZERO
1010 if (start == 0 && size == 0) {
1011 DEBUG(0,("client sent 0/0 lock - please report this\n"));
1013 #endif
1015 lock = (struct lock_struct) {
1016 .context.smblctx = smblctx,
1017 .context.pid = pid,
1018 .context.tid = br_lck->fsp->conn->cnum,
1019 .start = start,
1020 .size = size,
1021 .fnum = br_lck->fsp->fnum,
1022 .lock_type = lock_type,
1023 .lock_flav = lock_flav
1026 if (lock_flav == WINDOWS_LOCK) {
1027 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
1028 &lock, blocking_lock);
1029 } else {
1030 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
1033 #if ZERO_ZERO
1034 /* sort the lock list */
1035 TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
1036 #endif
1038 /* If we're returning an error, return who blocked us. */
1039 if (!NT_STATUS_IS_OK(ret) && psmblctx) {
1040 *psmblctx = lock.context.smblctx;
1042 return ret;
1045 static void brl_delete_lock_struct(struct lock_struct *locks,
1046 unsigned num_locks,
1047 unsigned del_idx)
1049 if (del_idx >= num_locks) {
1050 return;
1052 memmove(&locks[del_idx], &locks[del_idx+1],
1053 sizeof(*locks) * (num_locks - del_idx - 1));
1056 /****************************************************************************
1057 Unlock a range of bytes - Windows semantics.
1058 ****************************************************************************/
1060 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
1061 struct byte_range_lock *br_lck,
1062 const struct lock_struct *plock)
1064 unsigned int i, j;
1065 struct lock_struct *locks = br_lck->lock_data;
1066 enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
1068 SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
1070 #if ZERO_ZERO
1071 /* Delete write locks by preference... The lock list
1072 is sorted in the zero zero case. */
1074 for (i = 0; i < br_lck->num_locks; i++) {
1075 struct lock_struct *lock = &locks[i];
1077 if (lock->lock_type == WRITE_LOCK &&
1078 brl_same_context(&lock->context, &plock->context) &&
1079 lock->fnum == plock->fnum &&
1080 lock->lock_flav == WINDOWS_LOCK &&
1081 lock->start == plock->start &&
1082 lock->size == plock->size) {
1084 /* found it - delete it */
1085 deleted_lock_type = lock->lock_type;
1086 break;
1090 if (i != br_lck->num_locks) {
1091 /* We found it - don't search again. */
1092 goto unlock_continue;
1094 #endif
1096 for (i = 0; i < br_lck->num_locks; i++) {
1097 struct lock_struct *lock = &locks[i];
1099 if (IS_PENDING_LOCK(lock->lock_type)) {
1100 continue;
1103 /* Only remove our own locks that match in start, size, and flavour. */
1104 if (brl_same_context(&lock->context, &plock->context) &&
1105 lock->fnum == plock->fnum &&
1106 lock->lock_flav == WINDOWS_LOCK &&
1107 lock->start == plock->start &&
1108 lock->size == plock->size ) {
1109 deleted_lock_type = lock->lock_type;
1110 break;
1114 if (i == br_lck->num_locks) {
1115 /* we didn't find it */
1116 return False;
1119 #if ZERO_ZERO
1120 unlock_continue:
1121 #endif
1123 brl_delete_lock_struct(locks, br_lck->num_locks, i);
1124 br_lck->num_locks -= 1;
1125 br_lck->modified = True;
1127 /* Unlock the underlying POSIX regions. */
1128 if(lp_posix_locking(br_lck->fsp->conn->params)) {
1129 release_posix_lock_windows_flavour(br_lck->fsp,
1130 plock->start,
1131 plock->size,
1132 deleted_lock_type,
1133 &plock->context,
1134 locks,
1135 br_lck->num_locks);
1138 /* Send unlock messages to any pending waiters that overlap. */
1139 for (j=0; j < br_lck->num_locks; j++) {
1140 struct lock_struct *pend_lock = &locks[j];
1142 /* Ignore non-pending locks. */
1143 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1144 continue;
1147 /* We could send specific lock info here... */
1148 if (brl_pending_overlap(plock, pend_lock)) {
1149 DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1150 procid_str_static(&pend_lock->context.pid )));
1152 messaging_send(msg_ctx, pend_lock->context.pid,
1153 MSG_SMB_UNLOCK, &data_blob_null);
1157 contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1158 return True;
1161 /****************************************************************************
1162 Unlock a range of bytes - POSIX semantics.
1163 ****************************************************************************/
1165 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1166 struct byte_range_lock *br_lck,
1167 struct lock_struct *plock)
1169 unsigned int i, j, count;
1170 struct lock_struct *tp;
1171 struct lock_struct *locks = br_lck->lock_data;
1172 bool overlap_found = False;
1174 /* No zero-zero locks for POSIX. */
1175 if (plock->start == 0 && plock->size == 0) {
1176 return False;
1179 /* Don't allow 64-bit lock wrap. */
1180 if (plock->start + plock->size < plock->start ||
1181 plock->start + plock->size < plock->size) {
1182 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1183 return False;
1186 /* The worst case scenario here is we have to split an
1187 existing POSIX lock range into two, so we need at most
1188 1 more entry. */
1190 tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1);
1191 if (!tp) {
1192 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1193 return False;
1196 count = 0;
1197 for (i = 0; i < br_lck->num_locks; i++) {
1198 struct lock_struct *lock = &locks[i];
1199 unsigned int tmp_count;
1201 /* Only remove our own locks - ignore fnum. */
1202 if (IS_PENDING_LOCK(lock->lock_type) ||
1203 !brl_same_context(&lock->context, &plock->context)) {
1204 memcpy(&tp[count], lock, sizeof(struct lock_struct));
1205 count++;
1206 continue;
1209 if (lock->lock_flav == WINDOWS_LOCK) {
1210 /* Do any Windows flavour locks conflict ? */
1211 if (brl_conflict(lock, plock)) {
1212 TALLOC_FREE(tp);
1213 return false;
1215 /* Just copy the Windows lock into the new array. */
1216 memcpy(&tp[count], lock, sizeof(struct lock_struct));
1217 count++;
1218 continue;
1221 /* Work out overlaps. */
1222 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1224 if (tmp_count == 0) {
1225 /* plock overlapped the existing lock completely,
1226 or replaced it. Don't copy the existing lock. */
1227 overlap_found = true;
1228 } else if (tmp_count == 1) {
1229 /* Either no overlap, (simple copy of existing lock) or
1230 * an overlap of an existing lock. */
1231 /* If the lock changed size, we had an overlap. */
1232 if (tp[count].size != lock->size) {
1233 overlap_found = true;
1235 count += tmp_count;
1236 } else if (tmp_count == 2) {
1237 /* We split a lock range in two. */
1238 overlap_found = true;
1239 count += tmp_count;
1241 /* Optimisation... */
1242 /* We know we're finished here as we can't overlap any
1243 more POSIX locks. Copy the rest of the lock array. */
1245 if (i < br_lck->num_locks - 1) {
1246 memcpy(&tp[count], &locks[i+1],
1247 sizeof(*locks)*((br_lck->num_locks-1) - i));
1248 count += ((br_lck->num_locks-1) - i);
1250 break;
1255 if (!overlap_found) {
1256 /* Just ignore - no change. */
1257 TALLOC_FREE(tp);
1258 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1259 return True;
1262 /* Unlock any POSIX regions. */
1263 if(lp_posix_locking(br_lck->fsp->conn->params)) {
1264 release_posix_lock_posix_flavour(br_lck->fsp,
1265 plock->start,
1266 plock->size,
1267 &plock->context,
1269 count);
1272 /* Realloc so we don't leak entries per unlock call. */
1273 if (count) {
1274 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
1275 if (!tp) {
1276 DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1277 return False;
1279 } else {
1280 /* We deleted the last lock. */
1281 TALLOC_FREE(tp);
1282 tp = NULL;
1285 contend_level2_oplocks_end(br_lck->fsp,
1286 LEVEL2_CONTEND_POSIX_BRL);
1288 br_lck->num_locks = count;
1289 TALLOC_FREE(br_lck->lock_data);
1290 locks = tp;
1291 br_lck->lock_data = tp;
1292 br_lck->modified = True;
1294 /* Send unlock messages to any pending waiters that overlap. */
1296 for (j=0; j < br_lck->num_locks; j++) {
1297 struct lock_struct *pend_lock = &locks[j];
1299 /* Ignore non-pending locks. */
1300 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1301 continue;
1304 /* We could send specific lock info here... */
1305 if (brl_pending_overlap(plock, pend_lock)) {
1306 DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1307 procid_str_static(&pend_lock->context.pid )));
1309 messaging_send(msg_ctx, pend_lock->context.pid,
1310 MSG_SMB_UNLOCK, &data_blob_null);
1314 return True;
1317 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1318 struct messaging_context *msg_ctx,
1319 struct byte_range_lock *br_lck,
1320 const struct lock_struct *plock)
1322 VFS_FIND(brl_unlock_windows);
1323 return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck,
1324 plock);
1327 /****************************************************************************
1328 Unlock a range of bytes.
1329 ****************************************************************************/
1331 bool brl_unlock(struct messaging_context *msg_ctx,
1332 struct byte_range_lock *br_lck,
1333 uint64_t smblctx,
1334 struct server_id pid,
1335 br_off start,
1336 br_off size,
1337 enum brl_flavour lock_flav)
1339 struct lock_struct lock;
1341 lock.context.smblctx = smblctx;
1342 lock.context.pid = pid;
1343 lock.context.tid = br_lck->fsp->conn->cnum;
1344 lock.start = start;
1345 lock.size = size;
1346 lock.fnum = br_lck->fsp->fnum;
1347 lock.lock_type = UNLOCK_LOCK;
1348 lock.lock_flav = lock_flav;
1350 if (lock_flav == WINDOWS_LOCK) {
1351 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1352 br_lck, &lock);
1353 } else {
1354 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1358 /****************************************************************************
1359 Test if we could add a lock if we wanted to.
1360 Returns True if the region required is currently unlocked, False if locked.
1361 ****************************************************************************/
1363 bool brl_locktest(struct byte_range_lock *br_lck,
1364 const struct lock_struct *rw_probe)
1366 bool ret = True;
1367 unsigned int i;
1368 struct lock_struct *locks = br_lck->lock_data;
1369 files_struct *fsp = br_lck->fsp;
1371 /* Make sure existing locks don't conflict */
1372 for (i=0; i < br_lck->num_locks; i++) {
1374 * Our own locks don't conflict.
1376 if (brl_conflict_other(&locks[i], rw_probe)) {
1377 if (br_lck->record == NULL) {
1378 /* readonly */
1379 return false;
1382 if (!serverid_exists(&locks[i].context.pid)) {
1383 locks[i].context.pid.pid = 0;
1384 br_lck->modified = true;
1385 continue;
1388 return False;
1393 * There is no lock held by an SMB daemon, check to
1394 * see if there is a POSIX lock from a UNIX or NFS process.
1395 * This only conflicts with Windows locks, not POSIX locks.
1398 if(lp_posix_locking(fsp->conn->params) &&
1399 (rw_probe->lock_flav == WINDOWS_LOCK)) {
1401 * Make copies -- is_posix_locked might modify the values
1404 br_off start = rw_probe->start;
1405 br_off size = rw_probe->size;
1406 enum brl_type lock_type = rw_probe->lock_type;
1408 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1410 DEBUG(10, ("brl_locktest: posix start=%ju len=%ju %s for %s "
1411 "file %s\n", (uintmax_t)start, (uintmax_t)size,
1412 ret ? "locked" : "unlocked",
1413 fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1415 /* We need to return the inverse of is_posix_locked. */
1416 ret = !ret;
1419 /* no conflicts - we could have added it */
1420 return ret;
1423 /****************************************************************************
1424 Query for existing locks.
1425 ****************************************************************************/
1427 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1428 uint64_t *psmblctx,
1429 struct server_id pid,
1430 br_off *pstart,
1431 br_off *psize,
1432 enum brl_type *plock_type,
1433 enum brl_flavour lock_flav)
1435 unsigned int i;
1436 struct lock_struct lock;
1437 const struct lock_struct *locks = br_lck->lock_data;
1438 files_struct *fsp = br_lck->fsp;
1440 lock.context.smblctx = *psmblctx;
1441 lock.context.pid = pid;
1442 lock.context.tid = br_lck->fsp->conn->cnum;
1443 lock.start = *pstart;
1444 lock.size = *psize;
1445 lock.fnum = fsp->fnum;
1446 lock.lock_type = *plock_type;
1447 lock.lock_flav = lock_flav;
1449 /* Make sure existing locks don't conflict */
1450 for (i=0; i < br_lck->num_locks; i++) {
1451 const struct lock_struct *exlock = &locks[i];
1452 bool conflict = False;
1454 if (exlock->lock_flav == WINDOWS_LOCK) {
1455 conflict = brl_conflict(exlock, &lock);
1456 } else {
1457 conflict = brl_conflict_posix(exlock, &lock);
1460 if (conflict) {
1461 *psmblctx = exlock->context.smblctx;
1462 *pstart = exlock->start;
1463 *psize = exlock->size;
1464 *plock_type = exlock->lock_type;
1465 return NT_STATUS_LOCK_NOT_GRANTED;
1470 * There is no lock held by an SMB daemon, check to
1471 * see if there is a POSIX lock from a UNIX or NFS process.
1474 if(lp_posix_locking(fsp->conn->params)) {
1475 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1477 DEBUG(10, ("brl_lockquery: posix start=%ju len=%ju %s for %s "
1478 "file %s\n", (uintmax_t)*pstart,
1479 (uintmax_t)*psize, ret ? "locked" : "unlocked",
1480 fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1482 if (ret) {
1483 /* Hmmm. No clue what to set smblctx to - use -1. */
1484 *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1485 return NT_STATUS_LOCK_NOT_GRANTED;
1489 return NT_STATUS_OK;
1493 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1494 struct byte_range_lock *br_lck,
1495 struct lock_struct *plock)
1497 VFS_FIND(brl_cancel_windows);
1498 return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock);
1501 /****************************************************************************
1502 Remove a particular pending lock.
1503 ****************************************************************************/
1504 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1505 uint64_t smblctx,
1506 struct server_id pid,
1507 br_off start,
1508 br_off size,
1509 enum brl_flavour lock_flav)
1511 bool ret;
1512 struct lock_struct lock;
1514 lock.context.smblctx = smblctx;
1515 lock.context.pid = pid;
1516 lock.context.tid = br_lck->fsp->conn->cnum;
1517 lock.start = start;
1518 lock.size = size;
1519 lock.fnum = br_lck->fsp->fnum;
1520 lock.lock_flav = lock_flav;
1521 /* lock.lock_type doesn't matter */
1523 if (lock_flav == WINDOWS_LOCK) {
1524 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1525 &lock);
1526 } else {
1527 ret = brl_lock_cancel_default(br_lck, &lock);
1530 return ret;
1533 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1534 struct lock_struct *plock)
1536 unsigned int i;
1537 struct lock_struct *locks = br_lck->lock_data;
1539 SMB_ASSERT(plock);
1541 for (i = 0; i < br_lck->num_locks; i++) {
1542 struct lock_struct *lock = &locks[i];
1544 /* For pending locks we *always* care about the fnum. */
1545 if (brl_same_context(&lock->context, &plock->context) &&
1546 lock->fnum == plock->fnum &&
1547 IS_PENDING_LOCK(lock->lock_type) &&
1548 lock->lock_flav == plock->lock_flav &&
1549 lock->start == plock->start &&
1550 lock->size == plock->size) {
1551 break;
1555 if (i == br_lck->num_locks) {
1556 /* Didn't find it. */
1557 return False;
1560 brl_delete_lock_struct(locks, br_lck->num_locks, i);
1561 br_lck->num_locks -= 1;
1562 br_lck->modified = True;
1563 return True;
1566 /****************************************************************************
1567 Remove any locks associated with a open file.
1568 We return True if this process owns any other Windows locks on this
1569 fd and so we should not immediately close the fd.
1570 ****************************************************************************/
1572 void brl_close_fnum(struct messaging_context *msg_ctx,
1573 struct byte_range_lock *br_lck)
1575 files_struct *fsp = br_lck->fsp;
1576 uint32_t tid = fsp->conn->cnum;
1577 uint64_t fnum = fsp->fnum;
1578 unsigned int i;
1579 struct lock_struct *locks = br_lck->lock_data;
1580 struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
1581 struct lock_struct *locks_copy;
1582 unsigned int num_locks_copy;
1584 /* Copy the current lock array. */
1585 if (br_lck->num_locks) {
1586 locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1587 if (!locks_copy) {
1588 smb_panic("brl_close_fnum: talloc failed");
1590 } else {
1591 locks_copy = NULL;
1594 num_locks_copy = br_lck->num_locks;
1596 for (i=0; i < num_locks_copy; i++) {
1597 struct lock_struct *lock = &locks_copy[i];
1599 if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) &&
1600 (lock->fnum == fnum)) {
1601 brl_unlock(msg_ctx,
1602 br_lck,
1603 lock->context.smblctx,
1604 pid,
1605 lock->start,
1606 lock->size,
1607 lock->lock_flav);
1612 bool brl_mark_disconnected(struct files_struct *fsp)
1614 uint32_t tid = fsp->conn->cnum;
1615 uint64_t smblctx;
1616 uint64_t fnum = fsp->fnum;
1617 unsigned int i;
1618 struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1619 struct byte_range_lock *br_lck = NULL;
1621 if (fsp->op == NULL) {
1622 return false;
1625 smblctx = fsp->op->global->open_persistent_id;
1627 if (!fsp->op->global->durable) {
1628 return false;
1631 if (fsp->current_lock_count == 0) {
1632 return true;
1635 br_lck = brl_get_locks(talloc_tos(), fsp);
1636 if (br_lck == NULL) {
1637 return false;
1640 for (i=0; i < br_lck->num_locks; i++) {
1641 struct lock_struct *lock = &br_lck->lock_data[i];
1644 * as this is a durable handle, we only expect locks
1645 * of the current file handle!
1648 if (lock->context.smblctx != smblctx) {
1649 TALLOC_FREE(br_lck);
1650 return false;
1653 if (lock->context.tid != tid) {
1654 TALLOC_FREE(br_lck);
1655 return false;
1658 if (!serverid_equal(&lock->context.pid, &self)) {
1659 TALLOC_FREE(br_lck);
1660 return false;
1663 if (lock->fnum != fnum) {
1664 TALLOC_FREE(br_lck);
1665 return false;
1668 server_id_set_disconnected(&lock->context.pid);
1669 lock->context.tid = TID_FIELD_INVALID;
1670 lock->fnum = FNUM_FIELD_INVALID;
1673 br_lck->modified = true;
1674 TALLOC_FREE(br_lck);
1675 return true;
1678 bool brl_reconnect_disconnected(struct files_struct *fsp)
1680 uint32_t tid = fsp->conn->cnum;
1681 uint64_t smblctx;
1682 uint64_t fnum = fsp->fnum;
1683 unsigned int i;
1684 struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1685 struct byte_range_lock *br_lck = NULL;
1687 if (fsp->op == NULL) {
1688 return false;
1691 smblctx = fsp->op->global->open_persistent_id;
1693 if (!fsp->op->global->durable) {
1694 return false;
1698 * When reconnecting, we do not want to validate the brlock entries
1699 * and thereby remove our own (disconnected) entries but reactivate
1700 * them instead.
1703 br_lck = brl_get_locks(talloc_tos(), fsp);
1704 if (br_lck == NULL) {
1705 return false;
1708 if (br_lck->num_locks == 0) {
1709 TALLOC_FREE(br_lck);
1710 return true;
1713 for (i=0; i < br_lck->num_locks; i++) {
1714 struct lock_struct *lock = &br_lck->lock_data[i];
1717 * as this is a durable handle we only expect locks
1718 * of the current file handle!
1721 if (lock->context.smblctx != smblctx) {
1722 TALLOC_FREE(br_lck);
1723 return false;
1726 if (lock->context.tid != TID_FIELD_INVALID) {
1727 TALLOC_FREE(br_lck);
1728 return false;
1731 if (!server_id_is_disconnected(&lock->context.pid)) {
1732 TALLOC_FREE(br_lck);
1733 return false;
1736 if (lock->fnum != FNUM_FIELD_INVALID) {
1737 TALLOC_FREE(br_lck);
1738 return false;
1741 lock->context.pid = self;
1742 lock->context.tid = tid;
1743 lock->fnum = fnum;
1746 fsp->current_lock_count = br_lck->num_locks;
1747 br_lck->modified = true;
1748 TALLOC_FREE(br_lck);
1749 return true;
1752 struct brl_forall_cb {
1753 void (*fn)(struct file_id id, struct server_id pid,
1754 enum brl_type lock_type,
1755 enum brl_flavour lock_flav,
1756 br_off start, br_off size,
1757 void *private_data);
1758 void *private_data;
1761 /****************************************************************************
1762 Traverse the whole database with this function, calling traverse_callback
1763 on each lock.
1764 ****************************************************************************/
1766 static int brl_traverse_fn(struct db_record *rec, void *state)
1768 struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1769 struct lock_struct *locks;
1770 struct file_id *key;
1771 unsigned int i;
1772 unsigned int num_locks = 0;
1773 TDB_DATA dbkey;
1774 TDB_DATA value;
1776 dbkey = dbwrap_record_get_key(rec);
1777 value = dbwrap_record_get_value(rec);
1779 /* In a traverse function we must make a copy of
1780 dbuf before modifying it. */
1782 locks = (struct lock_struct *)talloc_memdup(
1783 talloc_tos(), value.dptr, value.dsize);
1784 if (!locks) {
1785 return -1; /* Terminate traversal. */
1788 key = (struct file_id *)dbkey.dptr;
1789 num_locks = value.dsize/sizeof(*locks);
1791 if (cb->fn) {
1792 for ( i=0; i<num_locks; i++) {
1793 cb->fn(*key,
1794 locks[i].context.pid,
1795 locks[i].lock_type,
1796 locks[i].lock_flav,
1797 locks[i].start,
1798 locks[i].size,
1799 cb->private_data);
1803 TALLOC_FREE(locks);
1804 return 0;
1807 /*******************************************************************
1808 Call the specified function on each lock in the database.
1809 ********************************************************************/
1811 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1812 enum brl_type lock_type,
1813 enum brl_flavour lock_flav,
1814 br_off start, br_off size,
1815 void *private_data),
1816 void *private_data)
1818 struct brl_forall_cb cb;
1819 NTSTATUS status;
1820 int count = 0;
1822 if (!brlock_db) {
1823 return 0;
1825 cb.fn = fn;
1826 cb.private_data = private_data;
1827 status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
1829 if (!NT_STATUS_IS_OK(status)) {
1830 return -1;
1831 } else {
1832 return count;
1836 /*******************************************************************
1837 Store a potentially modified set of byte range lock data back into
1838 the database.
1839 Unlock the record.
1840 ********************************************************************/
1842 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1844 size_t data_len;
1845 unsigned i;
1846 struct lock_struct *locks = br_lck->lock_data;
1848 if (!br_lck->modified) {
1849 DEBUG(10, ("br_lck not modified\n"));
1850 goto done;
1853 i = 0;
1855 while (i < br_lck->num_locks) {
1856 if (locks[i].context.pid.pid == 0) {
1858 * Autocleanup, the process conflicted and does not
1859 * exist anymore.
1861 locks[i] = locks[br_lck->num_locks-1];
1862 br_lck->num_locks -= 1;
1863 } else {
1864 i += 1;
1868 data_len = br_lck->num_locks * sizeof(struct lock_struct);
1870 if (br_lck->have_read_oplocks) {
1871 data_len += 1;
1874 DEBUG(10, ("data_len=%d\n", (int)data_len));
1876 if (data_len == 0) {
1877 /* No locks - delete this entry. */
1878 NTSTATUS status = dbwrap_record_delete(br_lck->record);
1879 if (!NT_STATUS_IS_OK(status)) {
1880 DEBUG(0, ("delete_rec returned %s\n",
1881 nt_errstr(status)));
1882 smb_panic("Could not delete byte range lock entry");
1884 } else {
1885 TDB_DATA data;
1886 NTSTATUS status;
1888 data.dsize = data_len;
1889 data.dptr = talloc_array(talloc_tos(), uint8_t, data_len);
1890 SMB_ASSERT(data.dptr != NULL);
1892 memcpy(data.dptr, br_lck->lock_data,
1893 br_lck->num_locks * sizeof(struct lock_struct));
1895 if (br_lck->have_read_oplocks) {
1896 data.dptr[data_len-1] = 1;
1899 status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
1900 TALLOC_FREE(data.dptr);
1901 if (!NT_STATUS_IS_OK(status)) {
1902 DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1903 smb_panic("Could not store byte range mode entry");
1907 DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db)));
1909 done:
1910 br_lck->modified = false;
1911 TALLOC_FREE(br_lck->record);
1914 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1916 byte_range_lock_flush(br_lck);
1917 return 0;
1920 /*******************************************************************
1921 Fetch a set of byte range lock data from the database.
1922 Leave the record locked.
1923 TALLOC_FREE(brl) will release the lock in the destructor.
1924 ********************************************************************/
1926 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
1928 TDB_DATA key, data;
1929 struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
1931 if (br_lck == NULL) {
1932 return NULL;
1935 br_lck->fsp = fsp;
1936 br_lck->num_locks = 0;
1937 br_lck->have_read_oplocks = false;
1938 br_lck->modified = False;
1940 key.dptr = (uint8 *)&fsp->file_id;
1941 key.dsize = sizeof(struct file_id);
1943 br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
1945 if (br_lck->record == NULL) {
1946 DEBUG(3, ("Could not lock byte range lock entry\n"));
1947 TALLOC_FREE(br_lck);
1948 return NULL;
1951 data = dbwrap_record_get_value(br_lck->record);
1953 br_lck->lock_data = NULL;
1955 talloc_set_destructor(br_lck, byte_range_lock_destructor);
1957 br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1959 if (br_lck->num_locks != 0) {
1960 br_lck->lock_data = talloc_array(
1961 br_lck, struct lock_struct, br_lck->num_locks);
1962 if (br_lck->lock_data == NULL) {
1963 DEBUG(0, ("malloc failed\n"));
1964 TALLOC_FREE(br_lck);
1965 return NULL;
1968 memcpy(br_lck->lock_data, data.dptr,
1969 talloc_get_size(br_lck->lock_data));
1972 DEBUG(10, ("data.dsize=%d\n", (int)data.dsize));
1974 if ((data.dsize % sizeof(struct lock_struct)) == 1) {
1975 br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
1978 if (DEBUGLEVEL >= 10) {
1979 unsigned int i;
1980 struct lock_struct *locks = br_lck->lock_data;
1981 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1982 br_lck->num_locks,
1983 file_id_string_tos(&fsp->file_id)));
1984 for( i = 0; i < br_lck->num_locks; i++) {
1985 print_lock_struct(i, &locks[i]);
1989 return br_lck;
1992 struct brl_get_locks_readonly_state {
1993 TALLOC_CTX *mem_ctx;
1994 struct byte_range_lock **br_lock;
1997 static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
1998 void *private_data)
2000 struct brl_get_locks_readonly_state *state =
2001 (struct brl_get_locks_readonly_state *)private_data;
2002 struct byte_range_lock *br_lock;
2004 br_lock = talloc_pooled_object(
2005 state->mem_ctx, struct byte_range_lock, 1, data.dsize);
2006 if (br_lock == NULL) {
2007 *state->br_lock = NULL;
2008 return;
2010 br_lock->lock_data = (struct lock_struct *)talloc_memdup(
2011 br_lock, data.dptr, data.dsize);
2012 br_lock->num_locks = data.dsize / sizeof(struct lock_struct);
2014 if ((data.dsize % sizeof(struct lock_struct)) == 1) {
2015 br_lock->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
2016 } else {
2017 br_lock->have_read_oplocks = false;
2020 DEBUG(10, ("Got %d bytes, have_read_oplocks: %s\n", (int)data.dsize,
2021 br_lock->have_read_oplocks ? "true" : "false"));
2023 *state->br_lock = br_lock;
2026 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
2028 struct byte_range_lock *br_lock = NULL;
2029 struct brl_get_locks_readonly_state state;
2030 NTSTATUS status;
2032 DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n",
2033 dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum));
2035 if ((fsp->brlock_rec != NULL)
2036 && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
2038 * We have cached the brlock_rec and the database did not
2039 * change.
2041 return fsp->brlock_rec;
2045 * Parse the record fresh from the database
2048 state.mem_ctx = fsp;
2049 state.br_lock = &br_lock;
2051 status = dbwrap_parse_record(
2052 brlock_db,
2053 make_tdb_data((uint8_t *)&fsp->file_id,
2054 sizeof(fsp->file_id)),
2055 brl_get_locks_readonly_parser, &state);
2057 if (NT_STATUS_EQUAL(status,NT_STATUS_NOT_FOUND)) {
2059 * No locks on this file. Return an empty br_lock.
2061 br_lock = talloc(fsp, struct byte_range_lock);
2062 if (br_lock == NULL) {
2063 return NULL;
2066 br_lock->have_read_oplocks = false;
2067 br_lock->num_locks = 0;
2068 br_lock->lock_data = NULL;
2070 } else if (!NT_STATUS_IS_OK(status)) {
2071 DEBUG(3, ("Could not parse byte range lock record: "
2072 "%s\n", nt_errstr(status)));
2073 return NULL;
2075 if (br_lock == NULL) {
2076 return NULL;
2079 br_lock->fsp = fsp;
2080 br_lock->modified = false;
2081 br_lock->record = NULL;
2083 if (lp_clustering()) {
2085 * In the cluster case we can't cache the brlock struct
2086 * because dbwrap_get_seqnum does not work reliably over
2087 * ctdb. Thus we have to throw away the brlock struct soon.
2089 talloc_steal(talloc_tos(), br_lock);
2090 } else {
2092 * Cache the brlock struct, invalidated when the dbwrap_seqnum
2093 * changes. See beginning of this routine.
2095 TALLOC_FREE(fsp->brlock_rec);
2096 fsp->brlock_rec = br_lock;
2097 fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
2100 return br_lock;
2103 struct brl_revalidate_state {
2104 ssize_t array_size;
2105 uint32 num_pids;
2106 struct server_id *pids;
2110 * Collect PIDs of all processes with pending entries
2113 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
2114 enum brl_type lock_type,
2115 enum brl_flavour lock_flav,
2116 br_off start, br_off size,
2117 void *private_data)
2119 struct brl_revalidate_state *state =
2120 (struct brl_revalidate_state *)private_data;
2122 if (!IS_PENDING_LOCK(lock_type)) {
2123 return;
2126 add_to_large_array(state, sizeof(pid), (void *)&pid,
2127 &state->pids, &state->num_pids,
2128 &state->array_size);
2132 * qsort callback to sort the processes
2135 static int compare_procids(const void *p1, const void *p2)
2137 const struct server_id *i1 = (const struct server_id *)p1;
2138 const struct server_id *i2 = (const struct server_id *)p2;
2140 if (i1->pid < i2->pid) return -1;
2141 if (i1->pid > i2->pid) return 1;
2142 return 0;
2146 * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2147 * locks so that they retry. Mainly used in the cluster code after a node has
2148 * died.
2150 * Done in two steps to avoid double-sends: First we collect all entries in an
2151 * array, then qsort that array and only send to non-dupes.
2154 void brl_revalidate(struct messaging_context *msg_ctx,
2155 void *private_data,
2156 uint32_t msg_type,
2157 struct server_id server_id,
2158 DATA_BLOB *data)
2160 struct brl_revalidate_state *state;
2161 uint32 i;
2162 struct server_id last_pid;
2164 if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
2165 DEBUG(0, ("talloc failed\n"));
2166 return;
2169 brl_forall(brl_revalidate_collect, state);
2171 if (state->array_size == -1) {
2172 DEBUG(0, ("talloc failed\n"));
2173 goto done;
2176 if (state->num_pids == 0) {
2177 goto done;
2180 TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2182 ZERO_STRUCT(last_pid);
2184 for (i=0; i<state->num_pids; i++) {
2185 if (serverid_equal(&last_pid, &state->pids[i])) {
2187 * We've seen that one already
2189 continue;
2192 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2193 &data_blob_null);
2194 last_pid = state->pids[i];
2197 done:
2198 TALLOC_FREE(state);
2199 return;
2202 bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
2204 bool ret = false;
2205 TALLOC_CTX *frame = talloc_stackframe();
2206 TDB_DATA key, val;
2207 struct db_record *rec;
2208 struct lock_struct *lock;
2209 unsigned n, num;
2210 NTSTATUS status;
2212 key = make_tdb_data((void*)&fid, sizeof(fid));
2214 rec = dbwrap_fetch_locked(brlock_db, frame, key);
2215 if (rec == NULL) {
2216 DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record "
2217 "for file %s\n", file_id_string(frame, &fid)));
2218 goto done;
2221 val = dbwrap_record_get_value(rec);
2222 lock = (struct lock_struct*)val.dptr;
2223 num = val.dsize / sizeof(struct lock_struct);
2224 if (lock == NULL) {
2225 DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for "
2226 "file %s\n", file_id_string(frame, &fid)));
2227 ret = true;
2228 goto done;
2231 for (n=0; n<num; n++) {
2232 struct lock_context *ctx = &lock[n].context;
2234 if (!server_id_is_disconnected(&ctx->pid)) {
2235 DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2236 "%s used by server %s, do not cleanup\n",
2237 file_id_string(frame, &fid),
2238 server_id_str(frame, &ctx->pid)));
2239 goto done;
2242 if (ctx->smblctx != open_persistent_id) {
2243 DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2244 "%s expected smblctx %llu but found %llu"
2245 ", do not cleanup\n",
2246 file_id_string(frame, &fid),
2247 (unsigned long long)open_persistent_id,
2248 (unsigned long long)ctx->smblctx));
2249 goto done;
2253 status = dbwrap_record_delete(rec);
2254 if (!NT_STATUS_IS_OK(status)) {
2255 DEBUG(5, ("brl_cleanup_disconnected: failed to delete record "
2256 "for file %s from %s, open %llu: %s\n",
2257 file_id_string(frame, &fid), dbwrap_name(brlock_db),
2258 (unsigned long long)open_persistent_id,
2259 nt_errstr(status)));
2260 goto done;
2263 DEBUG(10, ("brl_cleanup_disconnected: "
2264 "file %s cleaned up %u entries from open %llu\n",
2265 file_id_string(frame, &fid), num,
2266 (unsigned long long)open_persistent_id));
2268 ret = true;
2269 done:
2270 talloc_free(frame);
2271 return ret;