s3:share_mode_lock: consistently debug share_mode_entry records
[Samba.git] / source3 / locking / posix.c
blob6d55b292e00cdd464f8d37a4eb475ccd2ff46faf
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
32 #undef DBGC_CLASS
33 #define DBGC_CLASS DBGC_LOCKING
36 * The pending close database handle.
39 static struct db_context *posix_pending_close_db;
41 /****************************************************************************
42 First - the functions that deal with the underlying system locks - these
43 functions are used no matter if we're mapping CIFS Windows locks or CIFS
44 POSIX locks onto POSIX.
45 ****************************************************************************/
47 /****************************************************************************
48 Utility function to map a lock type correctly depending on the open
49 mode of a file.
50 ****************************************************************************/
52 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
54 if ((lock_type == WRITE_LOCK) && !fsp->fsp_flags.can_write) {
56 * Many UNIX's cannot get a write lock on a file opened read-only.
57 * Win32 locking semantics allow this.
58 * Do the best we can and attempt a read-only lock.
60 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
61 return F_RDLCK;
65 * This return should be the most normal, as we attempt
66 * to always open files read/write.
69 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
72 /****************************************************************************
73 Debugging aid :-).
74 ****************************************************************************/
76 static const char *posix_lock_type_name(int lock_type)
78 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
81 /****************************************************************************
82 Check to see if the given unsigned lock range is within the possible POSIX
83 range. Modifies the given args to be in range if possible, just returns
84 False if not.
85 ****************************************************************************/
87 #define SMB_OFF_T_BITS (sizeof(off_t)*8)
89 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
90 uint64_t u_offset, uint64_t u_count)
92 off_t offset = (off_t)u_offset;
93 off_t count = (off_t)u_count;
96 * For the type of system we are, attempt to
97 * find the maximum positive lock offset as an off_t.
100 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
102 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
103 #else
105 * In this case off_t is 64 bits,
106 * and the underlying system can handle 64 bit signed locks.
109 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
110 off_t mask = (mask2<<1);
111 off_t max_positive_lock_offset = ~mask;
113 #endif
115 * POSIX locks of length zero mean lock to end-of-file.
116 * Win32 locks of length zero are point probes. Ignore
117 * any Win32 locks of length zero. JRA.
120 if (count == 0) {
121 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
122 return False;
126 * If the given offset was > max_positive_lock_offset then we cannot map this at all
127 * ignore this lock.
130 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
131 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
132 "and we cannot handle this. Ignoring lock.\n",
133 (uintmax_t)u_offset,
134 (uintmax_t)max_positive_lock_offset));
135 return False;
139 * We must truncate the count to less than max_positive_lock_offset.
142 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
143 count = max_positive_lock_offset;
147 * Truncate count to end at max lock offset.
150 if (offset > INT64_MAX - count ||
151 offset + count > max_positive_lock_offset) {
152 count = max_positive_lock_offset - offset;
156 * If we ate all the count, ignore this lock.
159 if (count == 0) {
160 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
161 "u_offset = %ju, u_count = %ju\n",
162 (uintmax_t)u_offset,
163 (uintmax_t)u_count));
164 return False;
168 * The mapping was successful.
171 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
172 "count_out = %ju\n",
173 (uintmax_t)offset, (uintmax_t)count));
175 *offset_out = offset;
176 *count_out = count;
178 return True;
181 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
182 struct files_struct *fsp, int op, off_t offset,
183 off_t count, int type)
185 VFS_FIND(lock);
186 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
189 /****************************************************************************
190 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
191 broken NFS implementations.
192 ****************************************************************************/
194 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
196 bool ret;
198 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
199 fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
201 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
203 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
205 if ((errno == EINVAL) &&
206 (op != F_GETLK &&
207 op != F_SETLK &&
208 op != F_SETLKW)) {
209 DEBUG(0,("WARNING: OFD locks in use and no kernel "
210 "support. Try setting "
211 "'smbd:force process locks = true' "
212 "in smb.conf\n"));
213 } else {
214 DEBUG(0, ("WARNING: lock request at offset "
215 "%ju, length %ju returned\n",
216 (uintmax_t)offset, (uintmax_t)count));
217 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
218 "lock offsets\n", strerror(errno)));
219 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
223 * If the offset is > 0x7FFFFFFF then this will cause problems on
224 * 32 bit NFS mounted filesystems. Just ignore it.
227 if (offset & ~((off_t)0x7fffffff)) {
228 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
229 return True;
232 if (count & ~((off_t)0x7fffffff)) {
233 /* 32 bit NFS file system, retry with smaller offset */
234 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
235 errno = 0;
236 count &= 0x7fffffff;
237 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
241 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
242 return ret;
245 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
246 struct files_struct *fsp, off_t *poffset,
247 off_t *pcount, int *ptype, pid_t *ppid)
249 VFS_FIND(getlock);
250 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
251 ppid);
254 /****************************************************************************
255 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
256 broken NFS implementations.
257 ****************************************************************************/
259 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
261 pid_t pid;
262 bool ret;
264 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
265 fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
266 *ptype));
268 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
270 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
272 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
273 "offset %ju, length %ju returned\n",
274 (uintmax_t)*poffset, (uintmax_t)*pcount));
275 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
276 "lock offsets\n", strerror(errno)));
277 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
280 * If the offset is > 0x7FFFFFFF then this will cause problems on
281 * 32 bit NFS mounted filesystems. Just ignore it.
284 if (*poffset & ~((off_t)0x7fffffff)) {
285 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
286 return True;
289 if (*pcount & ~((off_t)0x7fffffff)) {
290 /* 32 bit NFS file system, retry with smaller offset */
291 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
292 errno = 0;
293 *pcount &= 0x7fffffff;
294 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
298 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
299 return ret;
302 /****************************************************************************
303 POSIX function to see if a file region is locked. Returns True if the
304 region is locked, False otherwise.
305 ****************************************************************************/
307 bool is_posix_locked(files_struct *fsp,
308 uint64_t *pu_offset,
309 uint64_t *pu_count,
310 enum brl_type *plock_type,
311 enum brl_flavour lock_flav)
313 off_t offset;
314 off_t count;
315 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
317 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
318 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
319 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
322 * If the requested lock won't fit in the POSIX range, we will
323 * never set it, so presume it is not locked.
326 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
327 return False;
330 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
331 return False;
334 if (posix_lock_type == F_UNLCK) {
335 return False;
338 if (lock_flav == POSIX_LOCK) {
339 /* Only POSIX lock queries need to know the details. */
340 *pu_offset = (uint64_t)offset;
341 *pu_count = (uint64_t)count;
342 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
344 return True;
347 /****************************************************************************
348 Next - the functions that deal with in memory database storing representations
349 of either Windows CIFS locks or POSIX CIFS locks.
350 ****************************************************************************/
352 /* The key used in the in-memory POSIX databases. */
354 struct lock_ref_count_key {
355 struct file_id id;
356 char r;
359 /*******************************************************************
360 Form a static locking key for a dev/inode pair for the lock ref count
361 ******************************************************************/
363 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
364 struct lock_ref_count_key *tmp)
366 ZERO_STRUCTP(tmp);
367 tmp->id = fsp->file_id;
368 tmp->r = 'r';
369 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
372 /*******************************************************************
373 Convenience function to get an fd_array key from an fsp.
374 ******************************************************************/
376 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
378 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
381 /*******************************************************************
382 Create the in-memory POSIX lock databases.
383 ********************************************************************/
385 bool posix_locking_init(bool read_only)
387 if (posix_pending_close_db != NULL) {
388 return true;
391 posix_pending_close_db = db_open_rbt(NULL);
393 if (posix_pending_close_db == NULL) {
394 DEBUG(0,("Failed to open POSIX pending close database.\n"));
395 return false;
398 return true;
401 /*******************************************************************
402 Delete the in-memory POSIX lock databases.
403 ********************************************************************/
405 bool posix_locking_end(void)
408 * Shouldn't we close all fd's here?
410 TALLOC_FREE(posix_pending_close_db);
411 return true;
414 /****************************************************************************
415 Next - the functions that deal with reference count of number of locks open
416 on a dev/ino pair.
417 ****************************************************************************/
419 /****************************************************************************
420 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
421 ****************************************************************************/
423 static void increment_lock_ref_count(const files_struct *fsp)
425 struct lock_ref_count_key tmp;
426 int32_t lock_ref_count = 0;
427 NTSTATUS status;
429 status = dbwrap_change_int32_atomic(
430 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
431 &lock_ref_count, 1);
433 SMB_ASSERT(NT_STATUS_IS_OK(status));
434 SMB_ASSERT(lock_ref_count < INT32_MAX);
436 DEBUG(10,("lock_ref_count for file %s = %d\n",
437 fsp_str_dbg(fsp), (int)(lock_ref_count + 1)));
440 /****************************************************************************
441 Reduce the lock ref count.
442 ****************************************************************************/
444 static void decrement_lock_ref_count(const files_struct *fsp)
446 struct lock_ref_count_key tmp;
447 int32_t lock_ref_count = 0;
448 NTSTATUS status;
450 status = dbwrap_change_int32_atomic(
451 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
452 &lock_ref_count, -1);
454 SMB_ASSERT(NT_STATUS_IS_OK(status));
455 SMB_ASSERT(lock_ref_count > 0);
457 DEBUG(10,("lock_ref_count for file %s = %d\n",
458 fsp_str_dbg(fsp), (int)(lock_ref_count - 1)));
461 /****************************************************************************
462 Fetch the lock ref count.
463 ****************************************************************************/
465 static int32_t get_lock_ref_count(const files_struct *fsp)
467 struct lock_ref_count_key tmp;
468 NTSTATUS status;
469 int32_t lock_ref_count = 0;
471 status = dbwrap_fetch_int32(
472 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
473 &lock_ref_count);
475 if (!NT_STATUS_IS_OK(status) &&
476 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
477 DEBUG(0, ("Error fetching "
478 "lock ref count for file %s: %s\n",
479 fsp_str_dbg(fsp), nt_errstr(status)));
481 return lock_ref_count;
484 /****************************************************************************
485 Delete a lock_ref_count entry.
486 ****************************************************************************/
488 static void delete_lock_ref_count(const files_struct *fsp)
490 struct lock_ref_count_key tmp;
492 /* Not a bug if it doesn't exist - no locks were ever granted. */
494 dbwrap_delete(posix_pending_close_db,
495 locking_ref_count_key_fsp(fsp, &tmp));
497 DEBUG(10,("delete_lock_ref_count for file %s\n",
498 fsp_str_dbg(fsp)));
501 /****************************************************************************
502 Next - the functions that deal with storing fd's that have outstanding
503 POSIX locks when closed.
504 ****************************************************************************/
506 /****************************************************************************
507 The records in posix_pending_close_db are composed of an array of
508 ints keyed by dev/ino pair. Those ints are the fd's that were open on
509 this dev/ino pair that should have been closed, but can't as the lock
510 ref count is non zero.
511 ****************************************************************************/
513 struct add_fd_to_close_entry_state {
514 const struct files_struct *fsp;
517 static void add_fd_to_close_entry_fn(
518 struct db_record *rec,
519 TDB_DATA value,
520 void *private_data)
522 struct add_fd_to_close_entry_state *state = private_data;
523 TDB_DATA values[] = {
524 value,
525 { .dptr = (uint8_t *)&(state->fsp->fh->fd),
526 .dsize = sizeof(state->fsp->fh->fd) },
528 NTSTATUS status;
530 SMB_ASSERT((values[0].dsize % sizeof(int)) == 0);
532 status = dbwrap_record_storev(rec, values, ARRAY_SIZE(values), 0);
533 SMB_ASSERT(NT_STATUS_IS_OK(status));
536 /****************************************************************************
537 Add an fd to the pending close db.
538 ****************************************************************************/
540 static void add_fd_to_close_entry(const files_struct *fsp)
542 struct add_fd_to_close_entry_state state = { .fsp = fsp };
543 NTSTATUS status;
545 status = dbwrap_do_locked(
546 posix_pending_close_db,
547 fd_array_key_fsp(fsp),
548 add_fd_to_close_entry_fn,
549 &state);
550 SMB_ASSERT(NT_STATUS_IS_OK(status));
552 DBG_DEBUG("added fd %d file %s\n",
553 fsp->fh->fd,
554 fsp_str_dbg(fsp));
557 static void fd_close_posix_fn(
558 struct db_record *rec,
559 TDB_DATA data,
560 void *private_data)
562 size_t num_fds, i;
564 SMB_ASSERT((data.dsize % sizeof(int)) == 0);
565 num_fds = data.dsize / sizeof(int);
567 for (i=0; i<num_fds; i++) {
568 int fd;
569 memcpy(&fd, data.dptr, sizeof(int));
570 close(fd);
571 data.dptr += sizeof(int);
573 dbwrap_record_delete(rec);
576 /****************************************************************************
577 Deal with pending closes needed by POSIX locking support.
578 Note that locking_close_file() is expected to have been called
579 to delete all locks on this fsp before this function is called.
580 ****************************************************************************/
582 int fd_close_posix(const struct files_struct *fsp)
584 NTSTATUS status;
586 if (!lp_locking(fsp->conn->params) ||
587 !lp_posix_locking(fsp->conn->params) ||
588 fsp->fsp_flags.use_ofd_locks)
591 * No locking or POSIX to worry about or we are using POSIX
592 * open file description lock semantics which only removes
593 * locks on the file descriptor we're closing. Just close.
595 return close(fsp->fh->fd);
598 if (get_lock_ref_count(fsp)) {
601 * There are outstanding locks on this dev/inode pair on
602 * other fds. Add our fd to the pending close db. We also
603 * set fsp->fh->fd to -1 inside fd_close() after returning
604 * from VFS layer.
607 add_fd_to_close_entry(fsp);
608 return 0;
611 status = dbwrap_do_locked(
612 posix_pending_close_db,
613 fd_array_key_fsp(fsp),
614 fd_close_posix_fn,
615 NULL);
616 if (!NT_STATUS_IS_OK(status)) {
617 DBG_WARNING("dbwrap_do_locked failed: %s\n",
618 nt_errstr(status));
621 /* Don't need a lock ref count on this dev/ino anymore. */
622 delete_lock_ref_count(fsp);
625 * Finally close the fd associated with this fsp.
628 return close(fsp->fh->fd);
631 /****************************************************************************
632 Next - the functions that deal with the mapping CIFS Windows locks onto
633 the underlying system POSIX locks.
634 ****************************************************************************/
637 * Structure used when splitting a lock range
638 * into a POSIX lock range. Doubly linked list.
641 struct lock_list {
642 struct lock_list *next;
643 struct lock_list *prev;
644 off_t start;
645 off_t size;
648 /****************************************************************************
649 Create a list of lock ranges that don't overlap a given range. Used in calculating
650 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
651 understand it :-).
652 ****************************************************************************/
654 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
655 struct lock_list *lhead,
656 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
657 const struct lock_struct *plocks,
658 int num_locks)
660 int i;
663 * Check the current lock list on this dev/inode pair.
664 * Quit if the list is deleted.
667 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
668 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
670 for (i=0; i<num_locks && lhead; i++) {
671 const struct lock_struct *lock = &plocks[i];
672 struct lock_list *l_curr;
674 /* Ignore all but read/write locks. */
675 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
676 continue;
679 /* Ignore locks not owned by this process. */
680 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
681 continue;
685 * Walk the lock list, checking for overlaps. Note that
686 * the lock list can expand within this loop if the current
687 * range being examined needs to be split.
690 for (l_curr = lhead; l_curr;) {
692 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
693 "start=%ju,size=%ju:type=%s",
694 (uintmax_t)lock->fnum,
695 (uintmax_t)lock->start,
696 (uintmax_t)lock->size,
697 posix_lock_type_name(lock->lock_type) ));
699 if ( (l_curr->start >= (lock->start + lock->size)) ||
700 (lock->start >= (l_curr->start + l_curr->size))) {
702 /* No overlap with existing lock - leave this range alone. */
703 /*********************************************
704 +---------+
705 | l_curr |
706 +---------+
707 +-------+
708 | lock |
709 +-------+
710 OR....
711 +---------+
712 | l_curr |
713 +---------+
714 **********************************************/
716 DEBUG(10,(" no overlap case.\n" ));
718 l_curr = l_curr->next;
720 } else if ( (l_curr->start >= lock->start) &&
721 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
724 * This range is completely overlapped by this existing lock range
725 * and thus should have no effect. Delete it from the list.
727 /*********************************************
728 +---------+
729 | l_curr |
730 +---------+
731 +---------------------------+
732 | lock |
733 +---------------------------+
734 **********************************************/
735 /* Save the next pointer */
736 struct lock_list *ul_next = l_curr->next;
738 DEBUG(10,(" delete case.\n" ));
740 DLIST_REMOVE(lhead, l_curr);
741 if(lhead == NULL) {
742 break; /* No more list... */
745 l_curr = ul_next;
747 } else if ( (l_curr->start >= lock->start) &&
748 (l_curr->start < lock->start + lock->size) &&
749 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
752 * This range overlaps the existing lock range at the high end.
753 * Truncate by moving start to existing range end and reducing size.
755 /*********************************************
756 +---------------+
757 | l_curr |
758 +---------------+
759 +---------------+
760 | lock |
761 +---------------+
762 BECOMES....
763 +-------+
764 | l_curr|
765 +-------+
766 **********************************************/
768 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
769 l_curr->start = lock->start + lock->size;
771 DEBUG(10, (" truncate high case: start=%ju,"
772 "size=%ju\n",
773 (uintmax_t)l_curr->start,
774 (uintmax_t)l_curr->size ));
776 l_curr = l_curr->next;
778 } else if ( (l_curr->start < lock->start) &&
779 (l_curr->start + l_curr->size > lock->start) &&
780 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
783 * This range overlaps the existing lock range at the low end.
784 * Truncate by reducing size.
786 /*********************************************
787 +---------------+
788 | l_curr |
789 +---------------+
790 +---------------+
791 | lock |
792 +---------------+
793 BECOMES....
794 +-------+
795 | l_curr|
796 +-------+
797 **********************************************/
799 l_curr->size = lock->start - l_curr->start;
801 DEBUG(10, (" truncate low case: start=%ju,"
802 "size=%ju\n",
803 (uintmax_t)l_curr->start,
804 (uintmax_t)l_curr->size ));
806 l_curr = l_curr->next;
808 } else if ( (l_curr->start < lock->start) &&
809 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
811 * Worst case scenario. Range completely overlaps an existing
812 * lock range. Split the request into two, push the new (upper) request
813 * into the dlink list, and continue with the entry after l_new (as we
814 * know that l_new will not overlap with this lock).
816 /*********************************************
817 +---------------------------+
818 | l_curr |
819 +---------------------------+
820 +---------+
821 | lock |
822 +---------+
823 BECOMES.....
824 +-------+ +---------+
825 | l_curr| | l_new |
826 +-------+ +---------+
827 **********************************************/
828 struct lock_list *l_new = talloc(ctx, struct lock_list);
830 if(l_new == NULL) {
831 DEBUG(0,("posix_lock_list: talloc fail.\n"));
832 return NULL; /* The talloc_destroy takes care of cleanup. */
835 ZERO_STRUCTP(l_new);
836 l_new->start = lock->start + lock->size;
837 l_new->size = l_curr->start + l_curr->size - l_new->start;
839 /* Truncate the l_curr. */
840 l_curr->size = lock->start - l_curr->start;
842 DEBUG(10, (" split case: curr: start=%ju,"
843 "size=%ju new: start=%ju,"
844 "size=%ju\n",
845 (uintmax_t)l_curr->start,
846 (uintmax_t)l_curr->size,
847 (uintmax_t)l_new->start,
848 (uintmax_t)l_new->size ));
851 * Add into the dlink list after the l_curr point - NOT at lhead.
853 DLIST_ADD_AFTER(lhead, l_new, l_curr);
855 /* And move after the link we added. */
856 l_curr = l_new->next;
858 } else {
861 * This logic case should never happen. Ensure this is the
862 * case by forcing an abort.... Remove in production.
864 char *msg = NULL;
866 if (asprintf(&msg, "logic flaw in cases: "
867 "l_curr: start = %ju, "
868 "size = %ju : lock: "
869 "start = %ju, size = %ju",
870 (uintmax_t)l_curr->start,
871 (uintmax_t)l_curr->size,
872 (uintmax_t)lock->start,
873 (uintmax_t)lock->size ) != -1) {
874 smb_panic(msg);
875 } else {
876 smb_panic("posix_lock_list");
879 } /* end for ( l_curr = lhead; l_curr;) */
880 } /* end for (i=0; i<num_locks && ul_head; i++) */
882 return lhead;
885 /****************************************************************************
886 POSIX function to acquire a lock. Returns True if the
887 lock could be granted, False if not.
888 ****************************************************************************/
890 bool set_posix_lock_windows_flavour(files_struct *fsp,
891 uint64_t u_offset,
892 uint64_t u_count,
893 enum brl_type lock_type,
894 const struct lock_context *lock_ctx,
895 const struct lock_struct *plocks,
896 int num_locks,
897 int *errno_ret)
899 off_t offset;
900 off_t count;
901 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
902 bool ret = True;
903 size_t lock_count;
904 TALLOC_CTX *l_ctx = NULL;
905 struct lock_list *llist = NULL;
906 struct lock_list *ll = NULL;
908 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
909 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
910 (uintmax_t)u_offset, (uintmax_t)u_count,
911 posix_lock_type_name(lock_type)));
914 * If the requested lock won't fit in the POSIX range, we will
915 * pretend it was successful.
918 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
919 increment_lock_ref_count(fsp);
920 return True;
924 * Windows is very strange. It allows read locks to be overlayed
925 * (even over a write lock), but leaves the write lock in force until the first
926 * unlock. It also reference counts the locks. This means the following sequence :
928 * process1 process2
929 * ------------------------------------------------------------------------
930 * WRITE LOCK : start = 2, len = 10
931 * READ LOCK: start =0, len = 10 - FAIL
932 * READ LOCK : start = 0, len = 14
933 * READ LOCK: start =0, len = 10 - FAIL
934 * UNLOCK : start = 2, len = 10
935 * READ LOCK: start =0, len = 10 - OK
937 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
938 * would leave a single read lock over the 0-14 region.
941 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
942 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
943 return False;
946 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
947 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
948 talloc_destroy(l_ctx);
949 return False;
953 * Create the initial list entry containing the
954 * lock we want to add.
957 ZERO_STRUCTP(ll);
958 ll->start = offset;
959 ll->size = count;
961 DLIST_ADD(llist, ll);
964 * The following call calculates if there are any
965 * overlapping locks held by this process on
966 * fd's open on the same file and splits this list
967 * into a list of lock ranges that do not overlap with existing
968 * POSIX locks.
971 llist = posix_lock_list(l_ctx,
972 llist,
973 lock_ctx, /* Lock context llist belongs to. */
974 plocks,
975 num_locks);
978 * Add the POSIX locks on the list of ranges returned.
979 * As the lock is supposed to be added atomically, we need to
980 * back out all the locks if any one of these calls fail.
983 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
984 offset = ll->start;
985 count = ll->size;
987 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
988 "Type = %s: offset = %ju, count = %ju\n",
989 posix_lock_type_name(posix_lock_type),
990 (uintmax_t)offset, (uintmax_t)count ));
992 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
993 *errno_ret = errno;
994 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
995 "fail !: Type = %s: offset = %ju, "
996 "count = %ju. Errno = %s\n",
997 posix_lock_type_name(posix_lock_type),
998 (uintmax_t)offset, (uintmax_t)count,
999 strerror(errno) ));
1000 ret = False;
1001 break;
1005 if (!ret) {
1008 * Back out all the POSIX locks we have on fail.
1011 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1012 offset = ll->start;
1013 count = ll->size;
1015 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1016 "out locks: Type = %s: offset = %ju, "
1017 "count = %ju\n",
1018 posix_lock_type_name(posix_lock_type),
1019 (uintmax_t)offset, (uintmax_t)count ));
1021 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1023 } else {
1024 /* Remember the number of locks we have on this dev/ino pair. */
1025 increment_lock_ref_count(fsp);
1028 talloc_destroy(l_ctx);
1029 return ret;
1032 /****************************************************************************
1033 POSIX function to release a lock. Returns True if the
1034 lock could be released, False if not.
1035 ****************************************************************************/
1037 bool release_posix_lock_windows_flavour(files_struct *fsp,
1038 uint64_t u_offset,
1039 uint64_t u_count,
1040 enum brl_type deleted_lock_type,
1041 const struct lock_context *lock_ctx,
1042 const struct lock_struct *plocks,
1043 int num_locks)
1045 off_t offset;
1046 off_t count;
1047 bool ret = True;
1048 TALLOC_CTX *ul_ctx = NULL;
1049 struct lock_list *ulist = NULL;
1050 struct lock_list *ul = NULL;
1052 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1053 "count = %ju\n", fsp_str_dbg(fsp),
1054 (uintmax_t)u_offset, (uintmax_t)u_count));
1056 /* Remember the number of locks we have on this dev/ino pair. */
1057 decrement_lock_ref_count(fsp);
1060 * If the requested lock won't fit in the POSIX range, we will
1061 * pretend it was successful.
1064 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1065 return True;
1068 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1069 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1070 return False;
1073 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1074 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1075 talloc_destroy(ul_ctx);
1076 return False;
1080 * Create the initial list entry containing the
1081 * lock we want to remove.
1084 ZERO_STRUCTP(ul);
1085 ul->start = offset;
1086 ul->size = count;
1088 DLIST_ADD(ulist, ul);
1091 * The following call calculates if there are any
1092 * overlapping locks held by this process on
1093 * fd's open on the same file and creates a
1094 * list of unlock ranges that will allow
1095 * POSIX lock ranges to remain on the file whilst the
1096 * unlocks are performed.
1099 ulist = posix_lock_list(ul_ctx,
1100 ulist,
1101 lock_ctx, /* Lock context ulist belongs to. */
1102 plocks,
1103 num_locks);
1106 * If there were any overlapped entries (list is > 1 or size or start have changed),
1107 * and the lock_type we just deleted from
1108 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1109 * the POSIX lock to a read lock. This allows any overlapping read locks
1110 * to be atomically maintained.
1113 if (deleted_lock_type == WRITE_LOCK &&
1114 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1116 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1117 "lock to READ: offset = %ju, count = %ju\n",
1118 (uintmax_t)offset, (uintmax_t)count ));
1120 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1121 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1122 talloc_destroy(ul_ctx);
1123 return False;
1128 * Release the POSIX locks on the list of ranges returned.
1131 for(; ulist; ulist = ulist->next) {
1132 offset = ulist->start;
1133 count = ulist->size;
1135 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1136 "offset = %ju, count = %ju\n",
1137 (uintmax_t)offset, (uintmax_t)count ));
1139 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1140 ret = False;
1144 talloc_destroy(ul_ctx);
1145 return ret;
1148 /****************************************************************************
1149 Next - the functions that deal with mapping CIFS POSIX locks onto
1150 the underlying system POSIX locks.
1151 ****************************************************************************/
1153 /****************************************************************************
1154 We only increment the lock ref count when we see a POSIX lock on a context
1155 that doesn't already have them.
1156 ****************************************************************************/
1158 static void increment_posix_lock_count(const files_struct *fsp,
1159 uint64_t smblctx)
1161 NTSTATUS status;
1162 TDB_DATA ctx_key;
1163 TDB_DATA val = { 0 };
1165 ctx_key.dptr = (uint8_t *)&smblctx;
1166 ctx_key.dsize = sizeof(smblctx);
1169 * Don't increment if we already have any POSIX flavor
1170 * locks on this context.
1172 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1173 return;
1176 /* Remember that we have POSIX flavor locks on this context. */
1177 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1178 SMB_ASSERT(NT_STATUS_IS_OK(status));
1180 increment_lock_ref_count(fsp);
1182 DEBUG(10,("posix_locks set for file %s\n",
1183 fsp_str_dbg(fsp)));
1186 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1188 NTSTATUS status;
1189 TDB_DATA ctx_key;
1191 ctx_key.dptr = (uint8_t *)&smblctx;
1192 ctx_key.dsize = sizeof(smblctx);
1194 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1195 SMB_ASSERT(NT_STATUS_IS_OK(status));
1197 decrement_lock_ref_count(fsp);
1199 DEBUG(10,("posix_locks deleted for file %s\n",
1200 fsp_str_dbg(fsp)));
1203 /****************************************************************************
1204 Return true if any locks exist on the given lock context.
1205 ****************************************************************************/
1207 static bool locks_exist_on_context(const struct lock_struct *plocks,
1208 int num_locks,
1209 const struct lock_context *lock_ctx)
1211 int i;
1213 for (i=0; i < num_locks; i++) {
1214 const struct lock_struct *lock = &plocks[i];
1216 /* Ignore all but read/write locks. */
1217 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1218 continue;
1221 /* Ignore locks not owned by this process. */
1222 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
1223 continue;
1226 if (lock_ctx->smblctx == lock->context.smblctx) {
1227 return true;
1230 return false;
1233 /****************************************************************************
1234 POSIX function to acquire a lock. Returns True if the
1235 lock could be granted, False if not.
1236 As POSIX locks don't stack or conflict (they just overwrite)
1237 we can map the requested lock directly onto a system one. We
1238 know it doesn't conflict with locks on other contexts as the
1239 upper layer would have refused it.
1240 ****************************************************************************/
1242 bool set_posix_lock_posix_flavour(files_struct *fsp,
1243 uint64_t u_offset,
1244 uint64_t u_count,
1245 enum brl_type lock_type,
1246 const struct lock_context *lock_ctx,
1247 int *errno_ret)
1249 off_t offset;
1250 off_t count;
1251 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1253 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1254 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1255 (uintmax_t)u_offset, (uintmax_t)u_count,
1256 posix_lock_type_name(lock_type)));
1259 * If the requested lock won't fit in the POSIX range, we will
1260 * pretend it was successful.
1263 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1264 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1265 return True;
1268 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1269 *errno_ret = errno;
1270 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1271 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1272 return False;
1274 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1275 return True;
1278 /****************************************************************************
1279 POSIX function to release a lock. Returns True if the
1280 lock could be released, False if not.
1281 We are given a complete lock state from the upper layer which is what the lock
1282 state should be after the unlock has already been done, so what
1283 we do is punch out holes in the unlock range where locks owned by this process
1284 have a different lock context.
1285 ****************************************************************************/
1287 bool release_posix_lock_posix_flavour(files_struct *fsp,
1288 uint64_t u_offset,
1289 uint64_t u_count,
1290 const struct lock_context *lock_ctx,
1291 const struct lock_struct *plocks,
1292 int num_locks)
1294 bool ret = True;
1295 off_t offset;
1296 off_t count;
1297 TALLOC_CTX *ul_ctx = NULL;
1298 struct lock_list *ulist = NULL;
1299 struct lock_list *ul = NULL;
1301 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1302 "count = %ju\n", fsp_str_dbg(fsp),
1303 (uintmax_t)u_offset, (uintmax_t)u_count));
1306 * If the requested lock won't fit in the POSIX range, we will
1307 * pretend it was successful.
1310 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1311 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1312 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1314 return True;
1317 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1318 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1319 return False;
1322 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1323 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1324 talloc_destroy(ul_ctx);
1325 return False;
1329 * Create the initial list entry containing the
1330 * lock we want to remove.
1333 ZERO_STRUCTP(ul);
1334 ul->start = offset;
1335 ul->size = count;
1337 DLIST_ADD(ulist, ul);
1340 * Walk the given array creating a linked list
1341 * of unlock requests.
1344 ulist = posix_lock_list(ul_ctx,
1345 ulist,
1346 lock_ctx, /* Lock context ulist belongs to. */
1347 plocks,
1348 num_locks);
1351 * Release the POSIX locks on the list of ranges returned.
1354 for(; ulist; ulist = ulist->next) {
1355 offset = ulist->start;
1356 count = ulist->size;
1358 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1359 "offset = %ju, count = %ju\n",
1360 (uintmax_t)offset, (uintmax_t)count ));
1362 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1363 ret = False;
1367 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1368 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1370 talloc_destroy(ul_ctx);
1371 return ret;