smbd: Move "struct byte_range_lock" definition to brlock.c
[Samba.git] / source3 / locking / brlock.c
blob1f4c7163919d7ae9cbe04e348181a70e1642030e
1 /*
2 Unix SMB/CIFS implementation.
3 byte range locking code
4 Updated to handle range splits/merges.
6 Copyright (C) Andrew Tridgell 1992-2000
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "locking/proto.h"
30 #include "smbd/globals.h"
31 #include "dbwrap/dbwrap.h"
32 #include "dbwrap/dbwrap_open.h"
33 #include "serverid.h"
34 #include "messages.h"
35 #include "util_tdb.h"
37 #undef DBGC_CLASS
38 #define DBGC_CLASS DBGC_LOCKING
40 #define ZERO_ZERO 0
42 /* The open brlock.tdb database. */
44 static struct db_context *brlock_db;
46 struct byte_range_lock {
47 struct files_struct *fsp;
48 unsigned int num_locks;
49 bool modified;
50 bool read_only;
51 struct file_id key;
52 struct lock_struct *lock_data;
53 struct db_record *record;
56 /****************************************************************************
57 Debug info at level 10 for lock struct.
58 ****************************************************************************/
60 static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
62 DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
64 (unsigned long long)pls->context.smblctx,
65 (unsigned int)pls->context.tid,
66 server_id_str(talloc_tos(), &pls->context.pid) ));
68 DEBUG(10,("start = %.0f, size = %.0f, fnum = %llu, %s %s\n",
69 (double)pls->start,
70 (double)pls->size,
71 (unsigned long long)pls->fnum,
72 lock_type_name(pls->lock_type),
73 lock_flav_name(pls->lock_flav) ));
76 unsigned int brl_num_locks(const struct byte_range_lock *brl)
78 return brl->num_locks;
81 struct files_struct *brl_fsp(struct byte_range_lock *brl)
83 return brl->fsp;
86 /****************************************************************************
87 See if two locking contexts are equal.
88 ****************************************************************************/
90 static bool brl_same_context(const struct lock_context *ctx1,
91 const struct lock_context *ctx2)
93 return (serverid_equal(&ctx1->pid, &ctx2->pid) &&
94 (ctx1->smblctx == ctx2->smblctx) &&
95 (ctx1->tid == ctx2->tid));
98 /****************************************************************************
99 See if lck1 and lck2 overlap.
100 ****************************************************************************/
102 static bool brl_overlap(const struct lock_struct *lck1,
103 const struct lock_struct *lck2)
105 /* XXX Remove for Win7 compatibility. */
106 /* this extra check is not redundant - it copes with locks
107 that go beyond the end of 64 bit file space */
108 if (lck1->size != 0 &&
109 lck1->start == lck2->start &&
110 lck1->size == lck2->size) {
111 return True;
114 if (lck1->start >= (lck2->start+lck2->size) ||
115 lck2->start >= (lck1->start+lck1->size)) {
116 return False;
118 return True;
121 /****************************************************************************
122 See if lock2 can be added when lock1 is in place.
123 ****************************************************************************/
125 static bool brl_conflict(const struct lock_struct *lck1,
126 const struct lock_struct *lck2)
128 /* Ignore PENDING locks. */
129 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
130 return False;
132 /* Read locks never conflict. */
133 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
134 return False;
137 /* A READ lock can stack on top of a WRITE lock if they have the same
138 * context & fnum. */
139 if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
140 brl_same_context(&lck1->context, &lck2->context) &&
141 lck1->fnum == lck2->fnum) {
142 return False;
145 return brl_overlap(lck1, lck2);
148 /****************************************************************************
149 See if lock2 can be added when lock1 is in place - when both locks are POSIX
150 flavour. POSIX locks ignore fnum - they only care about dev/ino which we
151 know already match.
152 ****************************************************************************/
154 static bool brl_conflict_posix(const struct lock_struct *lck1,
155 const struct lock_struct *lck2)
157 #if defined(DEVELOPER)
158 SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
159 SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
160 #endif
162 /* Ignore PENDING locks. */
163 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
164 return False;
166 /* Read locks never conflict. */
167 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
168 return False;
171 /* Locks on the same context con't conflict. Ignore fnum. */
172 if (brl_same_context(&lck1->context, &lck2->context)) {
173 return False;
176 /* One is read, the other write, or the context is different,
177 do they overlap ? */
178 return brl_overlap(lck1, lck2);
181 #if ZERO_ZERO
182 static bool brl_conflict1(const struct lock_struct *lck1,
183 const struct lock_struct *lck2)
185 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
186 return False;
188 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
189 return False;
192 if (brl_same_context(&lck1->context, &lck2->context) &&
193 lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
194 return False;
197 if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
198 return True;
201 if (lck1->start >= (lck2->start + lck2->size) ||
202 lck2->start >= (lck1->start + lck1->size)) {
203 return False;
206 return True;
208 #endif
210 /****************************************************************************
211 Check to see if this lock conflicts, but ignore our own locks on the
212 same fnum only. This is the read/write lock check code path.
213 This is never used in the POSIX lock case.
214 ****************************************************************************/
216 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
218 if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
219 return False;
221 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
222 return False;
224 /* POSIX flavour locks never conflict here - this is only called
225 in the read/write path. */
227 if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
228 return False;
231 * Incoming WRITE locks conflict with existing READ locks even
232 * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
235 if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
236 if (brl_same_context(&lck1->context, &lck2->context) &&
237 lck1->fnum == lck2->fnum)
238 return False;
241 return brl_overlap(lck1, lck2);
244 /****************************************************************************
245 Check if an unlock overlaps a pending lock.
246 ****************************************************************************/
248 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
250 if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
251 return True;
252 if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
253 return True;
254 return False;
257 /****************************************************************************
258 Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
259 is the same as this one and changes its error code. I wonder if any
260 app depends on this ?
261 ****************************************************************************/
263 static NTSTATUS brl_lock_failed(files_struct *fsp,
264 const struct lock_struct *lock,
265 bool blocking_lock)
267 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
268 /* amazing the little things you learn with a test
269 suite. Locks beyond this offset (as a 64 bit
270 number!) always generate the conflict error code,
271 unless the top bit is set */
272 if (!blocking_lock) {
273 fsp->last_lock_failure = *lock;
275 return NT_STATUS_FILE_LOCK_CONFLICT;
278 if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
279 lock->context.tid == fsp->last_lock_failure.context.tid &&
280 lock->fnum == fsp->last_lock_failure.fnum &&
281 lock->start == fsp->last_lock_failure.start) {
282 return NT_STATUS_FILE_LOCK_CONFLICT;
285 if (!blocking_lock) {
286 fsp->last_lock_failure = *lock;
288 return NT_STATUS_LOCK_NOT_GRANTED;
291 /****************************************************************************
292 Open up the brlock.tdb database.
293 ****************************************************************************/
295 void brl_init(bool read_only)
297 int tdb_flags;
299 if (brlock_db) {
300 return;
303 tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
305 if (!lp_clustering()) {
307 * We can't use the SEQNUM trick to cache brlock
308 * entries in the clustering case because ctdb seqnum
309 * propagation has a delay.
311 tdb_flags |= TDB_SEQNUM;
314 brlock_db = db_open(NULL, lock_path("brlock.tdb"),
315 lp_open_files_db_hash_size(), tdb_flags,
316 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
317 DBWRAP_LOCK_ORDER_2);
318 if (!brlock_db) {
319 DEBUG(0,("Failed to open byte range locking database %s\n",
320 lock_path("brlock.tdb")));
321 return;
325 /****************************************************************************
326 Close down the brlock.tdb database.
327 ****************************************************************************/
329 void brl_shutdown(void)
331 TALLOC_FREE(brlock_db);
334 #if ZERO_ZERO
335 /****************************************************************************
336 Compare two locks for sorting.
337 ****************************************************************************/
339 static int lock_compare(const struct lock_struct *lck1,
340 const struct lock_struct *lck2)
342 if (lck1->start != lck2->start) {
343 return (lck1->start - lck2->start);
345 if (lck2->size != lck1->size) {
346 return ((int)lck1->size - (int)lck2->size);
348 return 0;
350 #endif
352 /****************************************************************************
353 Lock a range of bytes - Windows lock semantics.
354 ****************************************************************************/
356 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
357 struct lock_struct *plock, bool blocking_lock)
359 unsigned int i;
360 files_struct *fsp = br_lck->fsp;
361 struct lock_struct *locks = br_lck->lock_data;
362 NTSTATUS status;
364 SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
366 if ((plock->start + plock->size - 1 < plock->start) &&
367 plock->size != 0) {
368 return NT_STATUS_INVALID_LOCK_RANGE;
371 for (i=0; i < br_lck->num_locks; i++) {
372 /* Do any Windows or POSIX locks conflict ? */
373 if (brl_conflict(&locks[i], plock)) {
374 /* Remember who blocked us. */
375 plock->context.smblctx = locks[i].context.smblctx;
376 return brl_lock_failed(fsp,plock,blocking_lock);
378 #if ZERO_ZERO
379 if (plock->start == 0 && plock->size == 0 &&
380 locks[i].size == 0) {
381 break;
383 #endif
386 if (!IS_PENDING_LOCK(plock->lock_type)) {
387 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
390 /* We can get the Windows lock, now see if it needs to
391 be mapped into a lower level POSIX one, and if so can
392 we get it ? */
394 if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
395 int errno_ret;
396 if (!set_posix_lock_windows_flavour(fsp,
397 plock->start,
398 plock->size,
399 plock->lock_type,
400 &plock->context,
401 locks,
402 br_lck->num_locks,
403 &errno_ret)) {
405 /* We don't know who blocked us. */
406 plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
408 if (errno_ret == EACCES || errno_ret == EAGAIN) {
409 status = NT_STATUS_FILE_LOCK_CONFLICT;
410 goto fail;
411 } else {
412 status = map_nt_error_from_unix(errno);
413 goto fail;
418 /* no conflicts - add it to the list of locks */
419 locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
420 if (!locks) {
421 status = NT_STATUS_NO_MEMORY;
422 goto fail;
425 memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
426 br_lck->num_locks += 1;
427 br_lck->lock_data = locks;
428 br_lck->modified = True;
430 return NT_STATUS_OK;
431 fail:
432 if (!IS_PENDING_LOCK(plock->lock_type)) {
433 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
435 return status;
438 /****************************************************************************
439 Cope with POSIX range splits and merges.
440 ****************************************************************************/
442 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr, /* Output array. */
443 struct lock_struct *ex, /* existing lock. */
444 struct lock_struct *plock) /* proposed lock. */
446 bool lock_types_differ = (ex->lock_type != plock->lock_type);
448 /* We can't merge non-conflicting locks on different context - ignore fnum. */
450 if (!brl_same_context(&ex->context, &plock->context)) {
451 /* Just copy. */
452 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
453 return 1;
456 /* We now know we have the same context. */
458 /* Did we overlap ? */
460 /*********************************************
461 +---------+
462 | ex |
463 +---------+
464 +-------+
465 | plock |
466 +-------+
467 OR....
468 +---------+
469 | ex |
470 +---------+
471 **********************************************/
473 if ( (ex->start > (plock->start + plock->size)) ||
474 (plock->start > (ex->start + ex->size))) {
476 /* No overlap with this lock - copy existing. */
478 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
479 return 1;
482 /*********************************************
483 +---------------------------+
484 | ex |
485 +---------------------------+
486 +---------------------------+
487 | plock | -> replace with plock.
488 +---------------------------+
490 +---------------+
491 | ex |
492 +---------------+
493 +---------------------------+
494 | plock | -> replace with plock.
495 +---------------------------+
497 **********************************************/
499 if ( (ex->start >= plock->start) &&
500 (ex->start + ex->size <= plock->start + plock->size) ) {
502 /* Replace - discard existing lock. */
504 return 0;
507 /*********************************************
508 Adjacent after.
509 +-------+
510 | ex |
511 +-------+
512 +---------------+
513 | plock |
514 +---------------+
516 BECOMES....
517 +---------------+-------+
518 | plock | ex | - different lock types.
519 +---------------+-------+
520 OR.... (merge)
521 +-----------------------+
522 | plock | - same lock type.
523 +-----------------------+
524 **********************************************/
526 if (plock->start + plock->size == ex->start) {
528 /* If the lock types are the same, we merge, if different, we
529 add the remainder of the old lock. */
531 if (lock_types_differ) {
532 /* Add existing. */
533 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
534 return 1;
535 } else {
536 /* Merge - adjust incoming lock as we may have more
537 * merging to come. */
538 plock->size += ex->size;
539 return 0;
543 /*********************************************
544 Adjacent before.
545 +-------+
546 | ex |
547 +-------+
548 +---------------+
549 | plock |
550 +---------------+
551 BECOMES....
552 +-------+---------------+
553 | ex | plock | - different lock types
554 +-------+---------------+
556 OR.... (merge)
557 +-----------------------+
558 | plock | - same lock type.
559 +-----------------------+
561 **********************************************/
563 if (ex->start + ex->size == plock->start) {
565 /* If the lock types are the same, we merge, if different, we
566 add the existing lock. */
568 if (lock_types_differ) {
569 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
570 return 1;
571 } else {
572 /* Merge - adjust incoming lock as we may have more
573 * merging to come. */
574 plock->start = ex->start;
575 plock->size += ex->size;
576 return 0;
580 /*********************************************
581 Overlap after.
582 +-----------------------+
583 | ex |
584 +-----------------------+
585 +---------------+
586 | plock |
587 +---------------+
589 +----------------+
590 | ex |
591 +----------------+
592 +---------------+
593 | plock |
594 +---------------+
596 BECOMES....
597 +---------------+-------+
598 | plock | ex | - different lock types.
599 +---------------+-------+
600 OR.... (merge)
601 +-----------------------+
602 | plock | - same lock type.
603 +-----------------------+
604 **********************************************/
606 if ( (ex->start >= plock->start) &&
607 (ex->start <= plock->start + plock->size) &&
608 (ex->start + ex->size > plock->start + plock->size) ) {
610 /* If the lock types are the same, we merge, if different, we
611 add the remainder of the old lock. */
613 if (lock_types_differ) {
614 /* Add remaining existing. */
615 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
616 /* Adjust existing start and size. */
617 lck_arr[0].start = plock->start + plock->size;
618 lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
619 return 1;
620 } else {
621 /* Merge - adjust incoming lock as we may have more
622 * merging to come. */
623 plock->size += (ex->start + ex->size) - (plock->start + plock->size);
624 return 0;
628 /*********************************************
629 Overlap before.
630 +-----------------------+
631 | ex |
632 +-----------------------+
633 +---------------+
634 | plock |
635 +---------------+
637 +-------------+
638 | ex |
639 +-------------+
640 +---------------+
641 | plock |
642 +---------------+
644 BECOMES....
645 +-------+---------------+
646 | ex | plock | - different lock types
647 +-------+---------------+
649 OR.... (merge)
650 +-----------------------+
651 | plock | - same lock type.
652 +-----------------------+
654 **********************************************/
656 if ( (ex->start < plock->start) &&
657 (ex->start + ex->size >= plock->start) &&
658 (ex->start + ex->size <= plock->start + plock->size) ) {
660 /* If the lock types are the same, we merge, if different, we
661 add the truncated old lock. */
663 if (lock_types_differ) {
664 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
665 /* Adjust existing size. */
666 lck_arr[0].size = plock->start - ex->start;
667 return 1;
668 } else {
669 /* Merge - adjust incoming lock as we may have more
670 * merging to come. MUST ADJUST plock SIZE FIRST ! */
671 plock->size += (plock->start - ex->start);
672 plock->start = ex->start;
673 return 0;
677 /*********************************************
678 Complete overlap.
679 +---------------------------+
680 | ex |
681 +---------------------------+
682 +---------+
683 | plock |
684 +---------+
685 BECOMES.....
686 +-------+---------+---------+
687 | ex | plock | ex | - different lock types.
688 +-------+---------+---------+
690 +---------------------------+
691 | plock | - same lock type.
692 +---------------------------+
693 **********************************************/
695 if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
697 if (lock_types_differ) {
699 /* We have to split ex into two locks here. */
701 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
702 memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
704 /* Adjust first existing size. */
705 lck_arr[0].size = plock->start - ex->start;
707 /* Adjust second existing start and size. */
708 lck_arr[1].start = plock->start + plock->size;
709 lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
710 return 2;
711 } else {
712 /* Just eat the existing locks, merge them into plock. */
713 plock->start = ex->start;
714 plock->size = ex->size;
715 return 0;
719 /* Never get here. */
720 smb_panic("brlock_posix_split_merge");
721 /* Notreached. */
723 /* Keep some compilers happy. */
724 return 0;
727 /****************************************************************************
728 Lock a range of bytes - POSIX lock semantics.
729 We must cope with range splits and merges.
730 ****************************************************************************/
732 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
733 struct byte_range_lock *br_lck,
734 struct lock_struct *plock)
736 unsigned int i, count, posix_count;
737 struct lock_struct *locks = br_lck->lock_data;
738 struct lock_struct *tp;
739 bool signal_pending_read = False;
740 bool break_oplocks = false;
741 NTSTATUS status;
743 /* No zero-zero locks for POSIX. */
744 if (plock->start == 0 && plock->size == 0) {
745 return NT_STATUS_INVALID_PARAMETER;
748 /* Don't allow 64-bit lock wrap. */
749 if (plock->start + plock->size - 1 < plock->start) {
750 return NT_STATUS_INVALID_PARAMETER;
753 /* The worst case scenario here is we have to split an
754 existing POSIX lock range into two, and add our lock,
755 so we need at most 2 more entries. */
757 tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
758 if (!tp) {
759 return NT_STATUS_NO_MEMORY;
762 count = posix_count = 0;
764 for (i=0; i < br_lck->num_locks; i++) {
765 struct lock_struct *curr_lock = &locks[i];
767 /* If we have a pending read lock, a lock downgrade should
768 trigger a lock re-evaluation. */
769 if (curr_lock->lock_type == PENDING_READ_LOCK &&
770 brl_pending_overlap(plock, curr_lock)) {
771 signal_pending_read = True;
774 if (curr_lock->lock_flav == WINDOWS_LOCK) {
775 /* Do any Windows flavour locks conflict ? */
776 if (brl_conflict(curr_lock, plock)) {
777 /* No games with error messages. */
778 SAFE_FREE(tp);
779 /* Remember who blocked us. */
780 plock->context.smblctx = curr_lock->context.smblctx;
781 return NT_STATUS_FILE_LOCK_CONFLICT;
783 /* Just copy the Windows lock into the new array. */
784 memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
785 count++;
786 } else {
787 unsigned int tmp_count = 0;
789 /* POSIX conflict semantics are different. */
790 if (brl_conflict_posix(curr_lock, plock)) {
791 /* Can't block ourselves with POSIX locks. */
792 /* No games with error messages. */
793 SAFE_FREE(tp);
794 /* Remember who blocked us. */
795 plock->context.smblctx = curr_lock->context.smblctx;
796 return NT_STATUS_FILE_LOCK_CONFLICT;
799 /* Work out overlaps. */
800 tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
801 posix_count += tmp_count;
802 count += tmp_count;
807 * Break oplocks while we hold a brl. Since lock() and unlock() calls
808 * are not symetric with POSIX semantics, we cannot guarantee our
809 * contend_level2_oplocks_begin/end calls will be acquired and
810 * released one-for-one as with Windows semantics. Therefore we only
811 * call contend_level2_oplocks_begin if this is the first POSIX brl on
812 * the file.
814 break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
815 posix_count == 0);
816 if (break_oplocks) {
817 contend_level2_oplocks_begin(br_lck->fsp,
818 LEVEL2_CONTEND_POSIX_BRL);
821 /* Try and add the lock in order, sorted by lock start. */
822 for (i=0; i < count; i++) {
823 struct lock_struct *curr_lock = &tp[i];
825 if (curr_lock->start <= plock->start) {
826 continue;
830 if (i < count) {
831 memmove(&tp[i+1], &tp[i],
832 (count - i)*sizeof(struct lock_struct));
834 memcpy(&tp[i], plock, sizeof(struct lock_struct));
835 count++;
837 /* We can get the POSIX lock, now see if it needs to
838 be mapped into a lower level POSIX one, and if so can
839 we get it ? */
841 if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
842 int errno_ret;
844 /* The lower layer just needs to attempt to
845 get the system POSIX lock. We've weeded out
846 any conflicts above. */
848 if (!set_posix_lock_posix_flavour(br_lck->fsp,
849 plock->start,
850 plock->size,
851 plock->lock_type,
852 &errno_ret)) {
854 /* We don't know who blocked us. */
855 plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
857 if (errno_ret == EACCES || errno_ret == EAGAIN) {
858 SAFE_FREE(tp);
859 status = NT_STATUS_FILE_LOCK_CONFLICT;
860 goto fail;
861 } else {
862 SAFE_FREE(tp);
863 status = map_nt_error_from_unix(errno);
864 goto fail;
869 /* If we didn't use all the allocated size,
870 * Realloc so we don't leak entries per lock call. */
871 if (count < br_lck->num_locks + 2) {
872 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
873 if (!tp) {
874 status = NT_STATUS_NO_MEMORY;
875 goto fail;
879 br_lck->num_locks = count;
880 SAFE_FREE(br_lck->lock_data);
881 br_lck->lock_data = tp;
882 locks = tp;
883 br_lck->modified = True;
885 /* A successful downgrade from write to read lock can trigger a lock
886 re-evalutation where waiting readers can now proceed. */
888 if (signal_pending_read) {
889 /* Send unlock messages to any pending read waiters that overlap. */
890 for (i=0; i < br_lck->num_locks; i++) {
891 struct lock_struct *pend_lock = &locks[i];
893 /* Ignore non-pending locks. */
894 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
895 continue;
898 if (pend_lock->lock_type == PENDING_READ_LOCK &&
899 brl_pending_overlap(plock, pend_lock)) {
900 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
901 procid_str_static(&pend_lock->context.pid )));
903 messaging_send(msg_ctx, pend_lock->context.pid,
904 MSG_SMB_UNLOCK, &data_blob_null);
909 return NT_STATUS_OK;
910 fail:
911 if (break_oplocks) {
912 contend_level2_oplocks_end(br_lck->fsp,
913 LEVEL2_CONTEND_POSIX_BRL);
915 return status;
918 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
919 struct byte_range_lock *br_lck,
920 struct lock_struct *plock,
921 bool blocking_lock,
922 struct blocking_lock_record *blr)
924 VFS_FIND(brl_lock_windows);
925 return handle->fns->brl_lock_windows_fn(handle, br_lck, plock,
926 blocking_lock, blr);
929 /****************************************************************************
930 Lock a range of bytes.
931 ****************************************************************************/
933 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
934 struct byte_range_lock *br_lck,
935 uint64_t smblctx,
936 struct server_id pid,
937 br_off start,
938 br_off size,
939 enum brl_type lock_type,
940 enum brl_flavour lock_flav,
941 bool blocking_lock,
942 uint64_t *psmblctx,
943 struct blocking_lock_record *blr)
945 NTSTATUS ret;
946 struct lock_struct lock;
948 #if !ZERO_ZERO
949 if (start == 0 && size == 0) {
950 DEBUG(0,("client sent 0/0 lock - please report this\n"));
952 #endif
954 #ifdef DEVELOPER
955 /* Quieten valgrind on test. */
956 ZERO_STRUCT(lock);
957 #endif
959 lock.context.smblctx = smblctx;
960 lock.context.pid = pid;
961 lock.context.tid = br_lck->fsp->conn->cnum;
962 lock.start = start;
963 lock.size = size;
964 lock.fnum = br_lck->fsp->fnum;
965 lock.lock_type = lock_type;
966 lock.lock_flav = lock_flav;
968 if (lock_flav == WINDOWS_LOCK) {
969 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
970 &lock, blocking_lock, blr);
971 } else {
972 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
975 #if ZERO_ZERO
976 /* sort the lock list */
977 TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
978 #endif
980 /* If we're returning an error, return who blocked us. */
981 if (!NT_STATUS_IS_OK(ret) && psmblctx) {
982 *psmblctx = lock.context.smblctx;
984 return ret;
987 /****************************************************************************
988 Unlock a range of bytes - Windows semantics.
989 ****************************************************************************/
991 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
992 struct byte_range_lock *br_lck,
993 const struct lock_struct *plock)
995 unsigned int i, j;
996 struct lock_struct *locks = br_lck->lock_data;
997 enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
999 SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
1001 #if ZERO_ZERO
1002 /* Delete write locks by preference... The lock list
1003 is sorted in the zero zero case. */
1005 for (i = 0; i < br_lck->num_locks; i++) {
1006 struct lock_struct *lock = &locks[i];
1008 if (lock->lock_type == WRITE_LOCK &&
1009 brl_same_context(&lock->context, &plock->context) &&
1010 lock->fnum == plock->fnum &&
1011 lock->lock_flav == WINDOWS_LOCK &&
1012 lock->start == plock->start &&
1013 lock->size == plock->size) {
1015 /* found it - delete it */
1016 deleted_lock_type = lock->lock_type;
1017 break;
1021 if (i != br_lck->num_locks) {
1022 /* We found it - don't search again. */
1023 goto unlock_continue;
1025 #endif
1027 for (i = 0; i < br_lck->num_locks; i++) {
1028 struct lock_struct *lock = &locks[i];
1030 if (IS_PENDING_LOCK(lock->lock_type)) {
1031 continue;
1034 /* Only remove our own locks that match in start, size, and flavour. */
1035 if (brl_same_context(&lock->context, &plock->context) &&
1036 lock->fnum == plock->fnum &&
1037 lock->lock_flav == WINDOWS_LOCK &&
1038 lock->start == plock->start &&
1039 lock->size == plock->size ) {
1040 deleted_lock_type = lock->lock_type;
1041 break;
1045 if (i == br_lck->num_locks) {
1046 /* we didn't find it */
1047 return False;
1050 #if ZERO_ZERO
1051 unlock_continue:
1052 #endif
1054 /* Actually delete the lock. */
1055 if (i < br_lck->num_locks - 1) {
1056 memmove(&locks[i], &locks[i+1],
1057 sizeof(*locks)*((br_lck->num_locks-1) - i));
1060 br_lck->num_locks -= 1;
1061 br_lck->modified = True;
1063 /* Unlock the underlying POSIX regions. */
1064 if(lp_posix_locking(br_lck->fsp->conn->params)) {
1065 release_posix_lock_windows_flavour(br_lck->fsp,
1066 plock->start,
1067 plock->size,
1068 deleted_lock_type,
1069 &plock->context,
1070 locks,
1071 br_lck->num_locks);
1074 /* Send unlock messages to any pending waiters that overlap. */
1075 for (j=0; j < br_lck->num_locks; j++) {
1076 struct lock_struct *pend_lock = &locks[j];
1078 /* Ignore non-pending locks. */
1079 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1080 continue;
1083 /* We could send specific lock info here... */
1084 if (brl_pending_overlap(plock, pend_lock)) {
1085 DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1086 procid_str_static(&pend_lock->context.pid )));
1088 messaging_send(msg_ctx, pend_lock->context.pid,
1089 MSG_SMB_UNLOCK, &data_blob_null);
1093 contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1094 return True;
1097 /****************************************************************************
1098 Unlock a range of bytes - POSIX semantics.
1099 ****************************************************************************/
1101 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1102 struct byte_range_lock *br_lck,
1103 struct lock_struct *plock)
1105 unsigned int i, j, count;
1106 struct lock_struct *tp;
1107 struct lock_struct *locks = br_lck->lock_data;
1108 bool overlap_found = False;
1110 /* No zero-zero locks for POSIX. */
1111 if (plock->start == 0 && plock->size == 0) {
1112 return False;
1115 /* Don't allow 64-bit lock wrap. */
1116 if (plock->start + plock->size < plock->start ||
1117 plock->start + plock->size < plock->size) {
1118 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1119 return False;
1122 /* The worst case scenario here is we have to split an
1123 existing POSIX lock range into two, so we need at most
1124 1 more entry. */
1126 tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1127 if (!tp) {
1128 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1129 return False;
1132 count = 0;
1133 for (i = 0; i < br_lck->num_locks; i++) {
1134 struct lock_struct *lock = &locks[i];
1135 unsigned int tmp_count;
1137 /* Only remove our own locks - ignore fnum. */
1138 if (IS_PENDING_LOCK(lock->lock_type) ||
1139 !brl_same_context(&lock->context, &plock->context)) {
1140 memcpy(&tp[count], lock, sizeof(struct lock_struct));
1141 count++;
1142 continue;
1145 if (lock->lock_flav == WINDOWS_LOCK) {
1146 /* Do any Windows flavour locks conflict ? */
1147 if (brl_conflict(lock, plock)) {
1148 SAFE_FREE(tp);
1149 return false;
1151 /* Just copy the Windows lock into the new array. */
1152 memcpy(&tp[count], lock, sizeof(struct lock_struct));
1153 count++;
1154 continue;
1157 /* Work out overlaps. */
1158 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1160 if (tmp_count == 0) {
1161 /* plock overlapped the existing lock completely,
1162 or replaced it. Don't copy the existing lock. */
1163 overlap_found = true;
1164 } else if (tmp_count == 1) {
1165 /* Either no overlap, (simple copy of existing lock) or
1166 * an overlap of an existing lock. */
1167 /* If the lock changed size, we had an overlap. */
1168 if (tp[count].size != lock->size) {
1169 overlap_found = true;
1171 count += tmp_count;
1172 } else if (tmp_count == 2) {
1173 /* We split a lock range in two. */
1174 overlap_found = true;
1175 count += tmp_count;
1177 /* Optimisation... */
1178 /* We know we're finished here as we can't overlap any
1179 more POSIX locks. Copy the rest of the lock array. */
1181 if (i < br_lck->num_locks - 1) {
1182 memcpy(&tp[count], &locks[i+1],
1183 sizeof(*locks)*((br_lck->num_locks-1) - i));
1184 count += ((br_lck->num_locks-1) - i);
1186 break;
1191 if (!overlap_found) {
1192 /* Just ignore - no change. */
1193 SAFE_FREE(tp);
1194 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1195 return True;
1198 /* Unlock any POSIX regions. */
1199 if(lp_posix_locking(br_lck->fsp->conn->params)) {
1200 release_posix_lock_posix_flavour(br_lck->fsp,
1201 plock->start,
1202 plock->size,
1203 &plock->context,
1205 count);
1208 /* Realloc so we don't leak entries per unlock call. */
1209 if (count) {
1210 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1211 if (!tp) {
1212 DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1213 return False;
1215 } else {
1216 /* We deleted the last lock. */
1217 SAFE_FREE(tp);
1218 tp = NULL;
1221 contend_level2_oplocks_end(br_lck->fsp,
1222 LEVEL2_CONTEND_POSIX_BRL);
1224 br_lck->num_locks = count;
1225 SAFE_FREE(br_lck->lock_data);
1226 locks = tp;
1227 br_lck->lock_data = tp;
1228 br_lck->modified = True;
1230 /* Send unlock messages to any pending waiters that overlap. */
1232 for (j=0; j < br_lck->num_locks; j++) {
1233 struct lock_struct *pend_lock = &locks[j];
1235 /* Ignore non-pending locks. */
1236 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1237 continue;
1240 /* We could send specific lock info here... */
1241 if (brl_pending_overlap(plock, pend_lock)) {
1242 DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1243 procid_str_static(&pend_lock->context.pid )));
1245 messaging_send(msg_ctx, pend_lock->context.pid,
1246 MSG_SMB_UNLOCK, &data_blob_null);
1250 return True;
1253 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1254 struct messaging_context *msg_ctx,
1255 struct byte_range_lock *br_lck,
1256 const struct lock_struct *plock)
1258 VFS_FIND(brl_unlock_windows);
1259 return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck,
1260 plock);
1263 /****************************************************************************
1264 Unlock a range of bytes.
1265 ****************************************************************************/
1267 bool brl_unlock(struct messaging_context *msg_ctx,
1268 struct byte_range_lock *br_lck,
1269 uint64_t smblctx,
1270 struct server_id pid,
1271 br_off start,
1272 br_off size,
1273 enum brl_flavour lock_flav)
1275 struct lock_struct lock;
1277 lock.context.smblctx = smblctx;
1278 lock.context.pid = pid;
1279 lock.context.tid = br_lck->fsp->conn->cnum;
1280 lock.start = start;
1281 lock.size = size;
1282 lock.fnum = br_lck->fsp->fnum;
1283 lock.lock_type = UNLOCK_LOCK;
1284 lock.lock_flav = lock_flav;
1286 if (lock_flav == WINDOWS_LOCK) {
1287 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1288 br_lck, &lock);
1289 } else {
1290 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1294 /****************************************************************************
1295 Test if we could add a lock if we wanted to.
1296 Returns True if the region required is currently unlocked, False if locked.
1297 ****************************************************************************/
1299 bool brl_locktest(struct byte_range_lock *br_lck,
1300 uint64_t smblctx,
1301 struct server_id pid,
1302 br_off start,
1303 br_off size,
1304 enum brl_type lock_type,
1305 enum brl_flavour lock_flav)
1307 bool ret = True;
1308 unsigned int i;
1309 struct lock_struct lock;
1310 const struct lock_struct *locks = br_lck->lock_data;
1311 files_struct *fsp = br_lck->fsp;
1313 lock.context.smblctx = smblctx;
1314 lock.context.pid = pid;
1315 lock.context.tid = br_lck->fsp->conn->cnum;
1316 lock.start = start;
1317 lock.size = size;
1318 lock.fnum = fsp->fnum;
1319 lock.lock_type = lock_type;
1320 lock.lock_flav = lock_flav;
1322 /* Make sure existing locks don't conflict */
1323 for (i=0; i < br_lck->num_locks; i++) {
1325 * Our own locks don't conflict.
1327 if (brl_conflict_other(&locks[i], &lock)) {
1328 return False;
1333 * There is no lock held by an SMB daemon, check to
1334 * see if there is a POSIX lock from a UNIX or NFS process.
1335 * This only conflicts with Windows locks, not POSIX locks.
1338 if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1339 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1341 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for %s file %s\n",
1342 (double)start, (double)size, ret ? "locked" : "unlocked",
1343 fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1345 /* We need to return the inverse of is_posix_locked. */
1346 ret = !ret;
1349 /* no conflicts - we could have added it */
1350 return ret;
1353 /****************************************************************************
1354 Query for existing locks.
1355 ****************************************************************************/
1357 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1358 uint64_t *psmblctx,
1359 struct server_id pid,
1360 br_off *pstart,
1361 br_off *psize,
1362 enum brl_type *plock_type,
1363 enum brl_flavour lock_flav)
1365 unsigned int i;
1366 struct lock_struct lock;
1367 const struct lock_struct *locks = br_lck->lock_data;
1368 files_struct *fsp = br_lck->fsp;
1370 lock.context.smblctx = *psmblctx;
1371 lock.context.pid = pid;
1372 lock.context.tid = br_lck->fsp->conn->cnum;
1373 lock.start = *pstart;
1374 lock.size = *psize;
1375 lock.fnum = fsp->fnum;
1376 lock.lock_type = *plock_type;
1377 lock.lock_flav = lock_flav;
1379 /* Make sure existing locks don't conflict */
1380 for (i=0; i < br_lck->num_locks; i++) {
1381 const struct lock_struct *exlock = &locks[i];
1382 bool conflict = False;
1384 if (exlock->lock_flav == WINDOWS_LOCK) {
1385 conflict = brl_conflict(exlock, &lock);
1386 } else {
1387 conflict = brl_conflict_posix(exlock, &lock);
1390 if (conflict) {
1391 *psmblctx = exlock->context.smblctx;
1392 *pstart = exlock->start;
1393 *psize = exlock->size;
1394 *plock_type = exlock->lock_type;
1395 return NT_STATUS_LOCK_NOT_GRANTED;
1400 * There is no lock held by an SMB daemon, check to
1401 * see if there is a POSIX lock from a UNIX or NFS process.
1404 if(lp_posix_locking(fsp->conn->params)) {
1405 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1407 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for %s file %s\n",
1408 (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1409 fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1411 if (ret) {
1412 /* Hmmm. No clue what to set smblctx to - use -1. */
1413 *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1414 return NT_STATUS_LOCK_NOT_GRANTED;
1418 return NT_STATUS_OK;
1422 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1423 struct byte_range_lock *br_lck,
1424 struct lock_struct *plock,
1425 struct blocking_lock_record *blr)
1427 VFS_FIND(brl_cancel_windows);
1428 return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock, blr);
1431 /****************************************************************************
1432 Remove a particular pending lock.
1433 ****************************************************************************/
1434 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1435 uint64_t smblctx,
1436 struct server_id pid,
1437 br_off start,
1438 br_off size,
1439 enum brl_flavour lock_flav,
1440 struct blocking_lock_record *blr)
1442 bool ret;
1443 struct lock_struct lock;
1445 lock.context.smblctx = smblctx;
1446 lock.context.pid = pid;
1447 lock.context.tid = br_lck->fsp->conn->cnum;
1448 lock.start = start;
1449 lock.size = size;
1450 lock.fnum = br_lck->fsp->fnum;
1451 lock.lock_flav = lock_flav;
1452 /* lock.lock_type doesn't matter */
1454 if (lock_flav == WINDOWS_LOCK) {
1455 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1456 &lock, blr);
1457 } else {
1458 ret = brl_lock_cancel_default(br_lck, &lock);
1461 return ret;
1464 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1465 struct lock_struct *plock)
1467 unsigned int i;
1468 struct lock_struct *locks = br_lck->lock_data;
1470 SMB_ASSERT(plock);
1472 for (i = 0; i < br_lck->num_locks; i++) {
1473 struct lock_struct *lock = &locks[i];
1475 /* For pending locks we *always* care about the fnum. */
1476 if (brl_same_context(&lock->context, &plock->context) &&
1477 lock->fnum == plock->fnum &&
1478 IS_PENDING_LOCK(lock->lock_type) &&
1479 lock->lock_flav == plock->lock_flav &&
1480 lock->start == plock->start &&
1481 lock->size == plock->size) {
1482 break;
1486 if (i == br_lck->num_locks) {
1487 /* Didn't find it. */
1488 return False;
1491 if (i < br_lck->num_locks - 1) {
1492 /* Found this particular pending lock - delete it */
1493 memmove(&locks[i], &locks[i+1],
1494 sizeof(*locks)*((br_lck->num_locks-1) - i));
1497 br_lck->num_locks -= 1;
1498 br_lck->modified = True;
1499 return True;
1502 /****************************************************************************
1503 Remove any locks associated with a open file.
1504 We return True if this process owns any other Windows locks on this
1505 fd and so we should not immediately close the fd.
1506 ****************************************************************************/
1508 void brl_close_fnum(struct messaging_context *msg_ctx,
1509 struct byte_range_lock *br_lck)
1511 files_struct *fsp = br_lck->fsp;
1512 uint32_t tid = fsp->conn->cnum;
1513 uint64_t fnum = fsp->fnum;
1514 unsigned int i;
1515 struct lock_struct *locks = br_lck->lock_data;
1516 struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
1517 struct lock_struct *locks_copy;
1518 unsigned int num_locks_copy;
1520 /* Copy the current lock array. */
1521 if (br_lck->num_locks) {
1522 locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1523 if (!locks_copy) {
1524 smb_panic("brl_close_fnum: talloc failed");
1526 } else {
1527 locks_copy = NULL;
1530 num_locks_copy = br_lck->num_locks;
1532 for (i=0; i < num_locks_copy; i++) {
1533 struct lock_struct *lock = &locks_copy[i];
1535 if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) &&
1536 (lock->fnum == fnum)) {
1537 brl_unlock(msg_ctx,
1538 br_lck,
1539 lock->context.smblctx,
1540 pid,
1541 lock->start,
1542 lock->size,
1543 lock->lock_flav);
1548 bool brl_mark_disconnected(struct files_struct *fsp)
1550 uint32_t tid = fsp->conn->cnum;
1551 uint64_t smblctx = fsp->op->global->open_persistent_id;
1552 uint64_t fnum = fsp->fnum;
1553 unsigned int i;
1554 struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1555 struct byte_range_lock *br_lck = NULL;
1557 if (!fsp->op->global->durable) {
1558 return false;
1561 if (fsp->current_lock_count == 0) {
1562 return true;
1565 br_lck = brl_get_locks(talloc_tos(), fsp);
1566 if (br_lck == NULL) {
1567 return false;
1570 for (i=0; i < br_lck->num_locks; i++) {
1571 struct lock_struct *lock = &br_lck->lock_data[i];
1574 * as this is a durable handle, we only expect locks
1575 * of the current file handle!
1578 if (lock->context.smblctx != smblctx) {
1579 TALLOC_FREE(br_lck);
1580 return false;
1583 if (lock->context.tid != tid) {
1584 TALLOC_FREE(br_lck);
1585 return false;
1588 if (!serverid_equal(&lock->context.pid, &self)) {
1589 TALLOC_FREE(br_lck);
1590 return false;
1593 if (lock->fnum != fnum) {
1594 TALLOC_FREE(br_lck);
1595 return false;
1598 server_id_set_disconnected(&lock->context.pid);
1599 lock->context.tid = TID_FIELD_INVALID;
1600 lock->fnum = FNUM_FIELD_INVALID;
1603 br_lck->modified = true;
1604 TALLOC_FREE(br_lck);
1605 return true;
1608 bool brl_reconnect_disconnected(struct files_struct *fsp)
1610 uint32_t tid = fsp->conn->cnum;
1611 uint64_t smblctx = fsp->op->global->open_persistent_id;
1612 uint64_t fnum = fsp->fnum;
1613 unsigned int i;
1614 struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1615 struct byte_range_lock *br_lck = NULL;
1617 if (!fsp->op->global->durable) {
1618 return false;
1622 * When reconnecting, we do not want to validate the brlock entries
1623 * and thereby remove our own (disconnected) entries but reactivate
1624 * them instead.
1626 fsp->lockdb_clean = true;
1628 br_lck = brl_get_locks(talloc_tos(), fsp);
1629 if (br_lck == NULL) {
1630 return false;
1633 if (br_lck->num_locks == 0) {
1634 TALLOC_FREE(br_lck);
1635 return true;
1638 for (i=0; i < br_lck->num_locks; i++) {
1639 struct lock_struct *lock = &br_lck->lock_data[i];
1642 * as this is a durable handle we only expect locks
1643 * of the current file handle!
1646 if (lock->context.smblctx != smblctx) {
1647 TALLOC_FREE(br_lck);
1648 return false;
1651 if (lock->context.tid != TID_FIELD_INVALID) {
1652 TALLOC_FREE(br_lck);
1653 return false;
1656 if (!server_id_is_disconnected(&lock->context.pid)) {
1657 TALLOC_FREE(br_lck);
1658 return false;
1661 if (lock->fnum != FNUM_FIELD_INVALID) {
1662 TALLOC_FREE(br_lck);
1663 return false;
1666 lock->context.pid = self;
1667 lock->context.tid = tid;
1668 lock->fnum = fnum;
1671 fsp->current_lock_count = br_lck->num_locks;
1672 br_lck->modified = true;
1673 TALLOC_FREE(br_lck);
1674 return true;
1677 /****************************************************************************
1678 Ensure this set of lock entries is valid.
1679 ****************************************************************************/
1680 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks,
1681 bool keep_disconnected)
1683 unsigned int i;
1684 unsigned int num_valid_entries = 0;
1685 struct lock_struct *locks = *pplocks;
1686 TALLOC_CTX *frame = talloc_stackframe();
1687 struct server_id *ids;
1688 bool *exists;
1690 ids = talloc_array(frame, struct server_id, *pnum_entries);
1691 if (ids == NULL) {
1692 DEBUG(0, ("validate_lock_entries: "
1693 "talloc_array(struct server_id, %u) failed\n",
1694 *pnum_entries));
1695 talloc_free(frame);
1696 return false;
1699 exists = talloc_array(frame, bool, *pnum_entries);
1700 if (exists == NULL) {
1701 DEBUG(0, ("validate_lock_entries: "
1702 "talloc_array(bool, %u) failed\n",
1703 *pnum_entries));
1704 talloc_free(frame);
1705 return false;
1708 for (i = 0; i < *pnum_entries; i++) {
1709 ids[i] = locks[i].context.pid;
1712 if (!serverids_exist(ids, *pnum_entries, exists)) {
1713 DEBUG(3, ("validate_lock_entries: serverids_exists failed\n"));
1714 talloc_free(frame);
1715 return false;
1718 for (i = 0; i < *pnum_entries; i++) {
1719 if (exists[i]) {
1720 num_valid_entries++;
1721 continue;
1724 if (keep_disconnected &&
1725 server_id_is_disconnected(&ids[i]))
1727 num_valid_entries++;
1728 continue;
1731 /* This process no longer exists - mark this
1732 entry as invalid by zeroing it. */
1733 ZERO_STRUCTP(&locks[i]);
1735 TALLOC_FREE(frame);
1737 if (num_valid_entries != *pnum_entries) {
1738 struct lock_struct *new_lock_data = NULL;
1740 if (num_valid_entries) {
1741 new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1742 if (!new_lock_data) {
1743 DEBUG(3, ("malloc fail\n"));
1744 return False;
1747 num_valid_entries = 0;
1748 for (i = 0; i < *pnum_entries; i++) {
1749 struct lock_struct *lock_data = &locks[i];
1750 if (lock_data->context.smblctx &&
1751 lock_data->context.tid) {
1752 /* Valid (nonzero) entry - copy it. */
1753 memcpy(&new_lock_data[num_valid_entries],
1754 lock_data, sizeof(struct lock_struct));
1755 num_valid_entries++;
1760 SAFE_FREE(*pplocks);
1761 *pplocks = new_lock_data;
1762 *pnum_entries = num_valid_entries;
1765 return True;
1768 struct brl_forall_cb {
1769 void (*fn)(struct file_id id, struct server_id pid,
1770 enum brl_type lock_type,
1771 enum brl_flavour lock_flav,
1772 br_off start, br_off size,
1773 void *private_data);
1774 void *private_data;
1777 /****************************************************************************
1778 Traverse the whole database with this function, calling traverse_callback
1779 on each lock.
1780 ****************************************************************************/
1782 static int brl_traverse_fn(struct db_record *rec, void *state)
1784 struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1785 struct lock_struct *locks;
1786 struct file_id *key;
1787 unsigned int i;
1788 unsigned int num_locks = 0;
1789 unsigned int orig_num_locks = 0;
1790 TDB_DATA dbkey;
1791 TDB_DATA value;
1793 dbkey = dbwrap_record_get_key(rec);
1794 value = dbwrap_record_get_value(rec);
1796 /* In a traverse function we must make a copy of
1797 dbuf before modifying it. */
1799 locks = (struct lock_struct *)memdup(value.dptr, value.dsize);
1800 if (!locks) {
1801 return -1; /* Terminate traversal. */
1804 key = (struct file_id *)dbkey.dptr;
1805 orig_num_locks = num_locks = value.dsize/sizeof(*locks);
1807 /* Ensure the lock db is clean of entries from invalid processes. */
1809 if (!validate_lock_entries(&num_locks, &locks, true)) {
1810 SAFE_FREE(locks);
1811 return -1; /* Terminate traversal */
1814 if (orig_num_locks != num_locks) {
1815 if (num_locks) {
1816 TDB_DATA data;
1817 data.dptr = (uint8_t *)locks;
1818 data.dsize = num_locks*sizeof(struct lock_struct);
1819 dbwrap_record_store(rec, data, TDB_REPLACE);
1820 } else {
1821 dbwrap_record_delete(rec);
1825 if (cb->fn) {
1826 for ( i=0; i<num_locks; i++) {
1827 cb->fn(*key,
1828 locks[i].context.pid,
1829 locks[i].lock_type,
1830 locks[i].lock_flav,
1831 locks[i].start,
1832 locks[i].size,
1833 cb->private_data);
1837 SAFE_FREE(locks);
1838 return 0;
1841 /*******************************************************************
1842 Call the specified function on each lock in the database.
1843 ********************************************************************/
1845 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1846 enum brl_type lock_type,
1847 enum brl_flavour lock_flav,
1848 br_off start, br_off size,
1849 void *private_data),
1850 void *private_data)
1852 struct brl_forall_cb cb;
1853 NTSTATUS status;
1854 int count = 0;
1856 if (!brlock_db) {
1857 return 0;
1859 cb.fn = fn;
1860 cb.private_data = private_data;
1861 status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
1863 if (!NT_STATUS_IS_OK(status)) {
1864 return -1;
1865 } else {
1866 return count;
1870 /*******************************************************************
1871 Store a potentially modified set of byte range lock data back into
1872 the database.
1873 Unlock the record.
1874 ********************************************************************/
1876 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1878 if (br_lck->read_only) {
1879 SMB_ASSERT(!br_lck->modified);
1882 if (!br_lck->modified) {
1883 goto done;
1886 if (br_lck->num_locks == 0) {
1887 /* No locks - delete this entry. */
1888 NTSTATUS status = dbwrap_record_delete(br_lck->record);
1889 if (!NT_STATUS_IS_OK(status)) {
1890 DEBUG(0, ("delete_rec returned %s\n",
1891 nt_errstr(status)));
1892 smb_panic("Could not delete byte range lock entry");
1894 } else {
1895 TDB_DATA data;
1896 NTSTATUS status;
1898 data.dptr = (uint8 *)br_lck->lock_data;
1899 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1901 status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
1902 if (!NT_STATUS_IS_OK(status)) {
1903 DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1904 smb_panic("Could not store byte range mode entry");
1908 done:
1910 br_lck->read_only = true;
1911 br_lck->modified = false;
1913 TALLOC_FREE(br_lck->record);
1916 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1918 byte_range_lock_flush(br_lck);
1919 SAFE_FREE(br_lck->lock_data);
1920 return 0;
1923 /*******************************************************************
1924 Fetch a set of byte range lock data from the database.
1925 Leave the record locked.
1926 TALLOC_FREE(brl) will release the lock in the destructor.
1927 ********************************************************************/
1929 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1930 files_struct *fsp, bool read_only)
1932 TDB_DATA key, data;
1933 struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
1934 bool do_read_only = read_only;
1936 if (br_lck == NULL) {
1937 return NULL;
1940 br_lck->fsp = fsp;
1941 br_lck->num_locks = 0;
1942 br_lck->modified = False;
1943 br_lck->key = fsp->file_id;
1945 key.dptr = (uint8 *)&br_lck->key;
1946 key.dsize = sizeof(struct file_id);
1948 if (!fsp->lockdb_clean) {
1949 /* We must be read/write to clean
1950 the dead entries. */
1951 do_read_only = false;
1954 if (do_read_only) {
1955 NTSTATUS status;
1956 status = dbwrap_fetch(brlock_db, br_lck, key, &data);
1957 if (!NT_STATUS_IS_OK(status)) {
1958 DEBUG(3, ("Could not fetch byte range lock record\n"));
1959 TALLOC_FREE(br_lck);
1960 return NULL;
1962 br_lck->record = NULL;
1963 } else {
1964 br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
1966 if (br_lck->record == NULL) {
1967 DEBUG(3, ("Could not lock byte range lock entry\n"));
1968 TALLOC_FREE(br_lck);
1969 return NULL;
1972 data = dbwrap_record_get_value(br_lck->record);
1975 br_lck->read_only = do_read_only;
1976 br_lck->lock_data = NULL;
1978 talloc_set_destructor(br_lck, byte_range_lock_destructor);
1980 br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1982 if (br_lck->num_locks != 0) {
1983 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1984 br_lck->num_locks);
1985 if (br_lck->lock_data == NULL) {
1986 DEBUG(0, ("malloc failed\n"));
1987 TALLOC_FREE(br_lck);
1988 return NULL;
1991 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1994 if (!fsp->lockdb_clean) {
1995 int orig_num_locks = br_lck->num_locks;
1998 * This is the first time we access the byte range lock
1999 * record with this fsp. Go through and ensure all entries
2000 * are valid - remove any that don't.
2001 * This makes the lockdb self cleaning at low cost.
2003 * Note: Disconnected entries belong to disconnected
2004 * durable handles. So at this point, we have a new
2005 * handle on the file and the disconnected durable has
2006 * already been closed (we are not a durable reconnect).
2007 * So we need to clean the disconnected brl entry.
2010 if (!validate_lock_entries(&br_lck->num_locks,
2011 &br_lck->lock_data, false)) {
2012 SAFE_FREE(br_lck->lock_data);
2013 TALLOC_FREE(br_lck);
2014 return NULL;
2017 /* Ensure invalid locks are cleaned up in the destructor. */
2018 if (orig_num_locks != br_lck->num_locks) {
2019 br_lck->modified = True;
2022 /* Mark the lockdb as "clean" as seen from this open file. */
2023 fsp->lockdb_clean = True;
2026 if (DEBUGLEVEL >= 10) {
2027 unsigned int i;
2028 struct lock_struct *locks = br_lck->lock_data;
2029 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
2030 br_lck->num_locks,
2031 file_id_string_tos(&fsp->file_id)));
2032 for( i = 0; i < br_lck->num_locks; i++) {
2033 print_lock_struct(i, &locks[i]);
2037 if (do_read_only != read_only) {
2039 * this stores the record and gets rid of
2040 * the write lock that is needed for a cleanup
2042 byte_range_lock_flush(br_lck);
2045 return br_lck;
2048 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
2049 files_struct *fsp)
2051 return brl_get_locks_internal(mem_ctx, fsp, False);
2054 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
2056 struct byte_range_lock *br_lock;
2058 if (lp_clustering()) {
2059 return brl_get_locks_internal(talloc_tos(), fsp, true);
2062 if ((fsp->brlock_rec != NULL)
2063 && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
2064 return fsp->brlock_rec;
2067 TALLOC_FREE(fsp->brlock_rec);
2069 br_lock = brl_get_locks_internal(talloc_tos(), fsp, true);
2070 if (br_lock == NULL) {
2071 return NULL;
2073 fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
2075 fsp->brlock_rec = talloc_move(fsp, &br_lock);
2077 return fsp->brlock_rec;
2080 struct brl_revalidate_state {
2081 ssize_t array_size;
2082 uint32 num_pids;
2083 struct server_id *pids;
2087 * Collect PIDs of all processes with pending entries
2090 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
2091 enum brl_type lock_type,
2092 enum brl_flavour lock_flav,
2093 br_off start, br_off size,
2094 void *private_data)
2096 struct brl_revalidate_state *state =
2097 (struct brl_revalidate_state *)private_data;
2099 if (!IS_PENDING_LOCK(lock_type)) {
2100 return;
2103 add_to_large_array(state, sizeof(pid), (void *)&pid,
2104 &state->pids, &state->num_pids,
2105 &state->array_size);
2109 * qsort callback to sort the processes
2112 static int compare_procids(const void *p1, const void *p2)
2114 const struct server_id *i1 = (const struct server_id *)p1;
2115 const struct server_id *i2 = (const struct server_id *)p2;
2117 if (i1->pid < i2->pid) return -1;
2118 if (i2->pid > i2->pid) return 1;
2119 return 0;
2123 * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2124 * locks so that they retry. Mainly used in the cluster code after a node has
2125 * died.
2127 * Done in two steps to avoid double-sends: First we collect all entries in an
2128 * array, then qsort that array and only send to non-dupes.
2131 void brl_revalidate(struct messaging_context *msg_ctx,
2132 void *private_data,
2133 uint32_t msg_type,
2134 struct server_id server_id,
2135 DATA_BLOB *data)
2137 struct brl_revalidate_state *state;
2138 uint32 i;
2139 struct server_id last_pid;
2141 if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
2142 DEBUG(0, ("talloc failed\n"));
2143 return;
2146 brl_forall(brl_revalidate_collect, state);
2148 if (state->array_size == -1) {
2149 DEBUG(0, ("talloc failed\n"));
2150 goto done;
2153 if (state->num_pids == 0) {
2154 goto done;
2157 TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2159 ZERO_STRUCT(last_pid);
2161 for (i=0; i<state->num_pids; i++) {
2162 if (serverid_equal(&last_pid, &state->pids[i])) {
2164 * We've seen that one already
2166 continue;
2169 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2170 &data_blob_null);
2171 last_pid = state->pids[i];
2174 done:
2175 TALLOC_FREE(state);
2176 return;
2179 bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
2181 bool ret = false;
2182 TALLOC_CTX *frame = talloc_stackframe();
2183 TDB_DATA key, val;
2184 struct db_record *rec;
2185 struct lock_struct *lock;
2186 unsigned n, num;
2187 NTSTATUS status;
2189 key = make_tdb_data((void*)&fid, sizeof(fid));
2191 rec = dbwrap_fetch_locked(brlock_db, frame, key);
2192 if (rec == NULL) {
2193 DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record "
2194 "for file %s\n", file_id_string(frame, &fid)));
2195 goto done;
2198 val = dbwrap_record_get_value(rec);
2199 lock = (struct lock_struct*)val.dptr;
2200 num = val.dsize / sizeof(struct lock_struct);
2201 if (lock == NULL) {
2202 DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for "
2203 "file %s\n", file_id_string(frame, &fid)));
2204 ret = true;
2205 goto done;
2208 for (n=0; n<num; n++) {
2209 struct lock_context *ctx = &lock[n].context;
2211 if (!server_id_is_disconnected(&ctx->pid)) {
2212 DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2213 "%s used by server %s, do not cleanup\n",
2214 file_id_string(frame, &fid),
2215 server_id_str(frame, &ctx->pid)));
2216 goto done;
2219 if (ctx->smblctx != open_persistent_id) {
2220 DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2221 "%s expected smblctx %llu but found %llu"
2222 ", do not cleanup\n",
2223 file_id_string(frame, &fid),
2224 (unsigned long long)open_persistent_id,
2225 (unsigned long long)ctx->smblctx));
2226 goto done;
2230 status = dbwrap_record_delete(rec);
2231 if (!NT_STATUS_IS_OK(status)) {
2232 DEBUG(5, ("brl_cleanup_disconnected: failed to delete record "
2233 "for file %s from %s, open %llu: %s\n",
2234 file_id_string(frame, &fid), dbwrap_name(brlock_db),
2235 (unsigned long long)open_persistent_id,
2236 nt_errstr(status)));
2237 goto done;
2240 DEBUG(10, ("brl_cleanup_disconnected: "
2241 "file %s cleaned up %u entries from open %llu\n",
2242 file_id_string(frame, &fid), num,
2243 (unsigned long long)open_persistent_id));
2245 ret = true;
2246 done:
2247 talloc_free(frame);
2248 return ret;