docs: mention --port in nmbd manpage.
[Samba.git] / source3 / locking / posix.c
blob2d89110b7d343e374c4bbecb9d1b3f267d4f74a5
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
35 * The pending close database handle.
38 static struct db_context *posix_pending_close_db;
40 /****************************************************************************
41 First - the functions that deal with the underlying system locks - these
42 functions are used no matter if we're mapping CIFS Windows locks or CIFS
43 POSIX locks onto POSIX.
44 ****************************************************************************/
46 /****************************************************************************
47 Utility function to map a lock type correctly depending on the open
48 mode of a file.
49 ****************************************************************************/
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
53 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
55 * Many UNIX's cannot get a write lock on a file opened read-only.
56 * Win32 locking semantics allow this.
57 * Do the best we can and attempt a read-only lock.
59 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60 return F_RDLCK;
64 * This return should be the most normal, as we attempt
65 * to always open files read/write.
68 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
71 /****************************************************************************
72 Debugging aid :-).
73 ****************************************************************************/
75 static const char *posix_lock_type_name(int lock_type)
77 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
80 /****************************************************************************
81 Check to see if the given unsigned lock range is within the possible POSIX
82 range. Modifies the given args to be in range if possible, just returns
83 False if not.
84 ****************************************************************************/
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87 uint64_t u_offset, uint64_t u_count)
89 off_t offset = (off_t)u_offset;
90 off_t count = (off_t)u_count;
93 * For the type of system we are, attempt to
94 * find the maximum positive lock offset as an off_t.
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
99 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
102 * In this case off_t is 64 bits,
103 * and the underlying system can handle 64 bit signed locks.
106 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107 off_t mask = (mask2<<1);
108 off_t max_positive_lock_offset = ~mask;
110 #endif
112 * POSIX locks of length zero mean lock to end-of-file.
113 * Win32 locks of length zero are point probes. Ignore
114 * any Win32 locks of length zero. JRA.
117 if (count == (off_t)0) {
118 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119 return False;
123 * If the given offset was > max_positive_lock_offset then we cannot map this at all
124 * ignore this lock.
127 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130 return False;
134 * We must truncate the count to less than max_positive_lock_offset.
137 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138 count = max_positive_lock_offset;
142 * Truncate count to end at max lock offset.
145 if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146 count = max_positive_lock_offset - offset;
150 * If we ate all the count, ignore this lock.
153 if (count == 0) {
154 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155 (double)u_offset, (double)u_count ));
156 return False;
160 * The mapping was successful.
163 DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164 (double)offset, (double)count ));
166 *offset_out = offset;
167 *count_out = count;
169 return True;
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173 struct files_struct *fsp, int op, off_t offset,
174 off_t count, int type)
176 VFS_FIND(lock);
177 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
180 /****************************************************************************
181 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182 broken NFS implementations.
183 ****************************************************************************/
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
187 bool ret;
189 DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
191 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
193 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
195 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196 (double)offset,(double)count));
197 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
201 * If the offset is > 0x7FFFFFFF then this will cause problems on
202 * 32 bit NFS mounted filesystems. Just ignore it.
205 if (offset & ~((off_t)0x7fffffff)) {
206 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207 return True;
210 if (count & ~((off_t)0x7fffffff)) {
211 /* 32 bit NFS file system, retry with smaller offset */
212 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213 errno = 0;
214 count &= 0x7fffffff;
215 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
219 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220 return ret;
223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
224 struct files_struct *fsp, off_t *poffset,
225 off_t *pcount, int *ptype, pid_t *ppid)
227 VFS_FIND(getlock);
228 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
229 ppid);
232 /****************************************************************************
233 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
234 broken NFS implementations.
235 ****************************************************************************/
237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
239 pid_t pid;
240 bool ret;
242 DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
243 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
245 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
247 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
249 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
250 (double)*poffset,(double)*pcount));
251 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
252 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
255 * If the offset is > 0x7FFFFFFF then this will cause problems on
256 * 32 bit NFS mounted filesystems. Just ignore it.
259 if (*poffset & ~((off_t)0x7fffffff)) {
260 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
261 return True;
264 if (*pcount & ~((off_t)0x7fffffff)) {
265 /* 32 bit NFS file system, retry with smaller offset */
266 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
267 errno = 0;
268 *pcount &= 0x7fffffff;
269 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
273 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
274 return ret;
277 /****************************************************************************
278 POSIX function to see if a file region is locked. Returns True if the
279 region is locked, False otherwise.
280 ****************************************************************************/
282 bool is_posix_locked(files_struct *fsp,
283 uint64_t *pu_offset,
284 uint64_t *pu_count,
285 enum brl_type *plock_type,
286 enum brl_flavour lock_flav)
288 off_t offset;
289 off_t count;
290 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
292 DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
293 "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
294 (double)*pu_count, posix_lock_type_name(*plock_type)));
297 * If the requested lock won't fit in the POSIX range, we will
298 * never set it, so presume it is not locked.
301 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
302 return False;
305 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
306 return False;
309 if (posix_lock_type == F_UNLCK) {
310 return False;
313 if (lock_flav == POSIX_LOCK) {
314 /* Only POSIX lock queries need to know the details. */
315 *pu_offset = (uint64_t)offset;
316 *pu_count = (uint64_t)count;
317 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
319 return True;
322 /****************************************************************************
323 Next - the functions that deal with in memory database storing representations
324 of either Windows CIFS locks or POSIX CIFS locks.
325 ****************************************************************************/
327 /* The key used in the in-memory POSIX databases. */
329 struct lock_ref_count_key {
330 struct file_id id;
331 char r;
334 /*******************************************************************
335 Form a static locking key for a dev/inode pair for the lock ref count
336 ******************************************************************/
338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
339 struct lock_ref_count_key *tmp)
341 ZERO_STRUCTP(tmp);
342 tmp->id = fsp->file_id;
343 tmp->r = 'r';
344 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
347 /*******************************************************************
348 Convenience function to get an fd_array key from an fsp.
349 ******************************************************************/
351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
353 return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
356 /*******************************************************************
357 Create the in-memory POSIX lock databases.
358 ********************************************************************/
360 bool posix_locking_init(bool read_only)
362 if (posix_pending_close_db != NULL) {
363 return true;
366 posix_pending_close_db = db_open_rbt(NULL);
368 if (posix_pending_close_db == NULL) {
369 DEBUG(0,("Failed to open POSIX pending close database.\n"));
370 return false;
373 return true;
376 /*******************************************************************
377 Delete the in-memory POSIX lock databases.
378 ********************************************************************/
380 bool posix_locking_end(void)
383 * Shouldn't we close all fd's here?
385 TALLOC_FREE(posix_pending_close_db);
386 return true;
389 /****************************************************************************
390 Next - the functions that deal with storing fd's that have outstanding
391 POSIX locks when closed.
392 ****************************************************************************/
394 /****************************************************************************
395 The records in posix_pending_close_db are composed of an array of
396 ints keyed by dev/ino pair. Those ints are the fd's that were open on
397 this dev/ino pair that should have been closed, but can't as the lock
398 ref count is non zero.
399 ****************************************************************************/
401 /****************************************************************************
402 Keep a reference count of the number of Windows locks open on this dev/ino
403 pair. Creates entry if it doesn't exist.
404 ****************************************************************************/
406 static void increment_windows_lock_ref_count(files_struct *fsp)
408 struct lock_ref_count_key tmp;
409 int32_t lock_ref_count = 0;
410 NTSTATUS status;
412 status = dbwrap_change_int32_atomic(
413 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
414 &lock_ref_count, 1);
416 SMB_ASSERT(NT_STATUS_IS_OK(status));
417 SMB_ASSERT(lock_ref_count < INT32_MAX);
419 DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
420 fsp_str_dbg(fsp), (int)lock_ref_count));
423 /****************************************************************************
424 Bulk delete - subtract as many locks as we've just deleted.
425 ****************************************************************************/
427 static void decrement_windows_lock_ref_count(files_struct *fsp)
429 struct lock_ref_count_key tmp;
430 int32_t lock_ref_count = 0;
431 NTSTATUS status;
433 status = dbwrap_change_int32_atomic(
434 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
435 &lock_ref_count, -1);
437 SMB_ASSERT(NT_STATUS_IS_OK(status));
438 SMB_ASSERT(lock_ref_count >= 0);
440 DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
441 fsp_str_dbg(fsp), (int)lock_ref_count));
444 /****************************************************************************
445 Fetch the lock ref count.
446 ****************************************************************************/
448 static int32_t get_windows_lock_ref_count(files_struct *fsp)
450 struct lock_ref_count_key tmp;
451 NTSTATUS status;
452 int32_t lock_ref_count = 0;
454 status = dbwrap_fetch_int32(
455 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
456 &lock_ref_count);
458 if (!NT_STATUS_IS_OK(status) &&
459 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
460 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
461 "lock ref count for file %s: %s\n",
462 fsp_str_dbg(fsp), nt_errstr(status)));
464 return lock_ref_count;
467 /****************************************************************************
468 Delete a lock_ref_count entry.
469 ****************************************************************************/
471 static void delete_windows_lock_ref_count(files_struct *fsp)
473 struct lock_ref_count_key tmp;
475 /* Not a bug if it doesn't exist - no locks were ever granted. */
477 dbwrap_delete(posix_pending_close_db,
478 locking_ref_count_key_fsp(fsp, &tmp));
480 DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
481 fsp_str_dbg(fsp)));
484 /****************************************************************************
485 Add an fd to the pending close tdb.
486 ****************************************************************************/
488 static void add_fd_to_close_entry(files_struct *fsp)
490 struct db_record *rec;
491 int *fds;
492 size_t num_fds;
493 NTSTATUS status;
494 TDB_DATA value;
496 rec = dbwrap_fetch_locked(
497 posix_pending_close_db, talloc_tos(),
498 fd_array_key_fsp(fsp));
500 SMB_ASSERT(rec != NULL);
502 value = dbwrap_record_get_value(rec);
503 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
505 num_fds = value.dsize / sizeof(int);
506 fds = talloc_array(rec, int, num_fds+1);
508 SMB_ASSERT(fds != NULL);
510 memcpy(fds, value.dptr, value.dsize);
511 fds[num_fds] = fsp->fh->fd;
513 status = dbwrap_record_store(
514 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
516 SMB_ASSERT(NT_STATUS_IS_OK(status));
518 TALLOC_FREE(rec);
520 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
521 fsp->fh->fd, fsp_str_dbg(fsp)));
524 /****************************************************************************
525 Remove all fd entries for a specific dev/inode pair from the tdb.
526 ****************************************************************************/
528 static void delete_close_entries(files_struct *fsp)
530 struct db_record *rec;
532 rec = dbwrap_fetch_locked(
533 posix_pending_close_db, talloc_tos(),
534 fd_array_key_fsp(fsp));
536 SMB_ASSERT(rec != NULL);
537 dbwrap_record_delete(rec);
538 TALLOC_FREE(rec);
541 /****************************************************************************
542 Get the array of POSIX pending close records for an open fsp. Returns number
543 of entries.
544 ****************************************************************************/
546 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
547 files_struct *fsp, int **entries)
549 TDB_DATA dbuf;
550 NTSTATUS status;
552 status = dbwrap_fetch(
553 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
554 &dbuf);
556 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
557 *entries = NULL;
558 return 0;
561 SMB_ASSERT(NT_STATUS_IS_OK(status));
563 if (dbuf.dsize == 0) {
564 *entries = NULL;
565 return 0;
568 *entries = (int *)dbuf.dptr;
569 return (size_t)(dbuf.dsize / sizeof(int));
572 /****************************************************************************
573 Deal with pending closes needed by POSIX locking support.
574 Note that posix_locking_close_file() is expected to have been called
575 to delete all locks on this fsp before this function is called.
576 ****************************************************************************/
578 int fd_close_posix(struct files_struct *fsp)
580 int saved_errno = 0;
581 int ret;
582 int *fd_array = NULL;
583 size_t count, i;
585 if (!lp_locking(fsp->conn->params) ||
586 !lp_posix_locking(fsp->conn->params))
589 * No locking or POSIX to worry about or we want POSIX semantics
590 * which will lose all locks on all fd's open on this dev/inode,
591 * just close.
593 return close(fsp->fh->fd);
596 if (get_windows_lock_ref_count(fsp)) {
599 * There are outstanding locks on this dev/inode pair on
600 * other fds. Add our fd to the pending close tdb and set
601 * fsp->fh->fd to -1.
604 add_fd_to_close_entry(fsp);
605 return 0;
609 * No outstanding locks. Get the pending close fd's
610 * from the tdb and close them all.
613 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
615 if (count) {
616 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
617 (unsigned int)count));
619 for(i = 0; i < count; i++) {
620 if (close(fd_array[i]) == -1) {
621 saved_errno = errno;
626 * Delete all fd's stored in the tdb
627 * for this dev/inode pair.
630 delete_close_entries(fsp);
633 TALLOC_FREE(fd_array);
635 /* Don't need a lock ref count on this dev/ino anymore. */
636 delete_windows_lock_ref_count(fsp);
639 * Finally close the fd associated with this fsp.
642 ret = close(fsp->fh->fd);
644 if (ret == 0 && saved_errno != 0) {
645 errno = saved_errno;
646 ret = -1;
649 return ret;
652 /****************************************************************************
653 Next - the functions that deal with the mapping CIFS Windows locks onto
654 the underlying system POSIX locks.
655 ****************************************************************************/
658 * Structure used when splitting a lock range
659 * into a POSIX lock range. Doubly linked list.
662 struct lock_list {
663 struct lock_list *next;
664 struct lock_list *prev;
665 off_t start;
666 off_t size;
669 /****************************************************************************
670 Create a list of lock ranges that don't overlap a given range. Used in calculating
671 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
672 understand it :-).
673 ****************************************************************************/
675 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
676 struct lock_list *lhead,
677 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
678 files_struct *fsp,
679 const struct lock_struct *plocks,
680 int num_locks)
682 int i;
685 * Check the current lock list on this dev/inode pair.
686 * Quit if the list is deleted.
689 DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
690 (double)lhead->start, (double)lhead->size ));
692 for (i=0; i<num_locks && lhead; i++) {
693 const struct lock_struct *lock = &plocks[i];
694 struct lock_list *l_curr;
696 /* Ignore all but read/write locks. */
697 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
698 continue;
701 /* Ignore locks not owned by this process. */
702 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
703 continue;
707 * Walk the lock list, checking for overlaps. Note that
708 * the lock list can expand within this loop if the current
709 * range being examined needs to be split.
712 for (l_curr = lhead; l_curr;) {
714 DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
715 (unsigned long long)lock->fnum,
716 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
718 if ( (l_curr->start >= (lock->start + lock->size)) ||
719 (lock->start >= (l_curr->start + l_curr->size))) {
721 /* No overlap with existing lock - leave this range alone. */
722 /*********************************************
723 +---------+
724 | l_curr |
725 +---------+
726 +-------+
727 | lock |
728 +-------+
729 OR....
730 +---------+
731 | l_curr |
732 +---------+
733 **********************************************/
735 DEBUG(10,(" no overlap case.\n" ));
737 l_curr = l_curr->next;
739 } else if ( (l_curr->start >= lock->start) &&
740 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
743 * This range is completely overlapped by this existing lock range
744 * and thus should have no effect. Delete it from the list.
746 /*********************************************
747 +---------+
748 | l_curr |
749 +---------+
750 +---------------------------+
751 | lock |
752 +---------------------------+
753 **********************************************/
754 /* Save the next pointer */
755 struct lock_list *ul_next = l_curr->next;
757 DEBUG(10,(" delete case.\n" ));
759 DLIST_REMOVE(lhead, l_curr);
760 if(lhead == NULL) {
761 break; /* No more list... */
764 l_curr = ul_next;
766 } else if ( (l_curr->start >= lock->start) &&
767 (l_curr->start < lock->start + lock->size) &&
768 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
771 * This range overlaps the existing lock range at the high end.
772 * Truncate by moving start to existing range end and reducing size.
774 /*********************************************
775 +---------------+
776 | l_curr |
777 +---------------+
778 +---------------+
779 | lock |
780 +---------------+
781 BECOMES....
782 +-------+
783 | l_curr|
784 +-------+
785 **********************************************/
787 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
788 l_curr->start = lock->start + lock->size;
790 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
791 (double)l_curr->start, (double)l_curr->size ));
793 l_curr = l_curr->next;
795 } else if ( (l_curr->start < lock->start) &&
796 (l_curr->start + l_curr->size > lock->start) &&
797 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
800 * This range overlaps the existing lock range at the low end.
801 * Truncate by reducing size.
803 /*********************************************
804 +---------------+
805 | l_curr |
806 +---------------+
807 +---------------+
808 | lock |
809 +---------------+
810 BECOMES....
811 +-------+
812 | l_curr|
813 +-------+
814 **********************************************/
816 l_curr->size = lock->start - l_curr->start;
818 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
819 (double)l_curr->start, (double)l_curr->size ));
821 l_curr = l_curr->next;
823 } else if ( (l_curr->start < lock->start) &&
824 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
826 * Worst case scenario. Range completely overlaps an existing
827 * lock range. Split the request into two, push the new (upper) request
828 * into the dlink list, and continue with the entry after l_new (as we
829 * know that l_new will not overlap with this lock).
831 /*********************************************
832 +---------------------------+
833 | l_curr |
834 +---------------------------+
835 +---------+
836 | lock |
837 +---------+
838 BECOMES.....
839 +-------+ +---------+
840 | l_curr| | l_new |
841 +-------+ +---------+
842 **********************************************/
843 struct lock_list *l_new = talloc(ctx, struct lock_list);
845 if(l_new == NULL) {
846 DEBUG(0,("posix_lock_list: talloc fail.\n"));
847 return NULL; /* The talloc_destroy takes care of cleanup. */
850 ZERO_STRUCTP(l_new);
851 l_new->start = lock->start + lock->size;
852 l_new->size = l_curr->start + l_curr->size - l_new->start;
854 /* Truncate the l_curr. */
855 l_curr->size = lock->start - l_curr->start;
857 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
858 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
859 (double)l_new->start, (double)l_new->size ));
862 * Add into the dlink list after the l_curr point - NOT at lhead.
864 DLIST_ADD_AFTER(lhead, l_new, l_curr);
866 /* And move after the link we added. */
867 l_curr = l_new->next;
869 } else {
872 * This logic case should never happen. Ensure this is the
873 * case by forcing an abort.... Remove in production.
875 char *msg = NULL;
877 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
878 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
879 smb_panic(msg);
880 } else {
881 smb_panic("posix_lock_list");
884 } /* end for ( l_curr = lhead; l_curr;) */
885 } /* end for (i=0; i<num_locks && ul_head; i++) */
887 return lhead;
890 /****************************************************************************
891 POSIX function to acquire a lock. Returns True if the
892 lock could be granted, False if not.
893 ****************************************************************************/
895 bool set_posix_lock_windows_flavour(files_struct *fsp,
896 uint64_t u_offset,
897 uint64_t u_count,
898 enum brl_type lock_type,
899 const struct lock_context *lock_ctx,
900 const struct lock_struct *plocks,
901 int num_locks,
902 int *errno_ret)
904 off_t offset;
905 off_t count;
906 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
907 bool ret = True;
908 size_t lock_count;
909 TALLOC_CTX *l_ctx = NULL;
910 struct lock_list *llist = NULL;
911 struct lock_list *ll = NULL;
913 DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
914 "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
915 (double)u_offset, (double)u_count,
916 posix_lock_type_name(lock_type)));
919 * If the requested lock won't fit in the POSIX range, we will
920 * pretend it was successful.
923 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
924 increment_windows_lock_ref_count(fsp);
925 return True;
929 * Windows is very strange. It allows read locks to be overlayed
930 * (even over a write lock), but leaves the write lock in force until the first
931 * unlock. It also reference counts the locks. This means the following sequence :
933 * process1 process2
934 * ------------------------------------------------------------------------
935 * WRITE LOCK : start = 2, len = 10
936 * READ LOCK: start =0, len = 10 - FAIL
937 * READ LOCK : start = 0, len = 14
938 * READ LOCK: start =0, len = 10 - FAIL
939 * UNLOCK : start = 2, len = 10
940 * READ LOCK: start =0, len = 10 - OK
942 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
943 * would leave a single read lock over the 0-14 region.
946 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
947 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
948 return False;
951 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
952 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
953 talloc_destroy(l_ctx);
954 return False;
958 * Create the initial list entry containing the
959 * lock we want to add.
962 ZERO_STRUCTP(ll);
963 ll->start = offset;
964 ll->size = count;
966 DLIST_ADD(llist, ll);
969 * The following call calculates if there are any
970 * overlapping locks held by this process on
971 * fd's open on the same file and splits this list
972 * into a list of lock ranges that do not overlap with existing
973 * POSIX locks.
976 llist = posix_lock_list(l_ctx,
977 llist,
978 lock_ctx, /* Lock context llist belongs to. */
979 fsp,
980 plocks,
981 num_locks);
984 * Add the POSIX locks on the list of ranges returned.
985 * As the lock is supposed to be added atomically, we need to
986 * back out all the locks if any one of these calls fail.
989 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
990 offset = ll->start;
991 count = ll->size;
993 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
994 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
996 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
997 *errno_ret = errno;
998 DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
999 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1000 ret = False;
1001 break;
1005 if (!ret) {
1008 * Back out all the POSIX locks we have on fail.
1011 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1012 offset = ll->start;
1013 count = ll->size;
1015 DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1016 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1018 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1020 } else {
1021 /* Remember the number of Windows locks we have on this dev/ino pair. */
1022 increment_windows_lock_ref_count(fsp);
1025 talloc_destroy(l_ctx);
1026 return ret;
1029 /****************************************************************************
1030 POSIX function to release a lock. Returns True if the
1031 lock could be released, False if not.
1032 ****************************************************************************/
1034 bool release_posix_lock_windows_flavour(files_struct *fsp,
1035 uint64_t u_offset,
1036 uint64_t u_count,
1037 enum brl_type deleted_lock_type,
1038 const struct lock_context *lock_ctx,
1039 const struct lock_struct *plocks,
1040 int num_locks)
1042 off_t offset;
1043 off_t count;
1044 bool ret = True;
1045 TALLOC_CTX *ul_ctx = NULL;
1046 struct lock_list *ulist = NULL;
1047 struct lock_list *ul = NULL;
1049 DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1050 "count = %.0f\n", fsp_str_dbg(fsp),
1051 (double)u_offset, (double)u_count));
1053 /* Remember the number of Windows locks we have on this dev/ino pair. */
1054 decrement_windows_lock_ref_count(fsp);
1057 * If the requested lock won't fit in the POSIX range, we will
1058 * pretend it was successful.
1061 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1062 return True;
1065 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1066 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1067 return False;
1070 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1071 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1072 talloc_destroy(ul_ctx);
1073 return False;
1077 * Create the initial list entry containing the
1078 * lock we want to remove.
1081 ZERO_STRUCTP(ul);
1082 ul->start = offset;
1083 ul->size = count;
1085 DLIST_ADD(ulist, ul);
1088 * The following call calculates if there are any
1089 * overlapping locks held by this process on
1090 * fd's open on the same file and creates a
1091 * list of unlock ranges that will allow
1092 * POSIX lock ranges to remain on the file whilst the
1093 * unlocks are performed.
1096 ulist = posix_lock_list(ul_ctx,
1097 ulist,
1098 lock_ctx, /* Lock context ulist belongs to. */
1099 fsp,
1100 plocks,
1101 num_locks);
1104 * If there were any overlapped entries (list is > 1 or size or start have changed),
1105 * and the lock_type we just deleted from
1106 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1107 * the POSIX lock to a read lock. This allows any overlapping read locks
1108 * to be atomically maintained.
1111 if (deleted_lock_type == WRITE_LOCK &&
1112 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1114 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1115 (double)offset, (double)count ));
1117 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1118 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1119 talloc_destroy(ul_ctx);
1120 return False;
1125 * Release the POSIX locks on the list of ranges returned.
1128 for(; ulist; ulist = ulist->next) {
1129 offset = ulist->start;
1130 count = ulist->size;
1132 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1133 (double)offset, (double)count ));
1135 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1136 ret = False;
1140 talloc_destroy(ul_ctx);
1141 return ret;
1144 /****************************************************************************
1145 Next - the functions that deal with mapping CIFS POSIX locks onto
1146 the underlying system POSIX locks.
1147 ****************************************************************************/
1149 /****************************************************************************
1150 POSIX function to acquire a lock. Returns True if the
1151 lock could be granted, False if not.
1152 As POSIX locks don't stack or conflict (they just overwrite)
1153 we can map the requested lock directly onto a system one. We
1154 know it doesn't conflict with locks on other contexts as the
1155 upper layer would have refused it.
1156 ****************************************************************************/
1158 bool set_posix_lock_posix_flavour(files_struct *fsp,
1159 uint64_t u_offset,
1160 uint64_t u_count,
1161 enum brl_type lock_type,
1162 int *errno_ret)
1164 off_t offset;
1165 off_t count;
1166 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1168 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1169 "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1170 (double)u_offset, (double)u_count,
1171 posix_lock_type_name(lock_type)));
1174 * If the requested lock won't fit in the POSIX range, we will
1175 * pretend it was successful.
1178 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1179 return True;
1182 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1183 *errno_ret = errno;
1184 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1185 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1186 return False;
1188 return True;
1191 /****************************************************************************
1192 POSIX function to release a lock. Returns True if the
1193 lock could be released, False if not.
1194 We are given a complete lock state from the upper layer which is what the lock
1195 state should be after the unlock has already been done, so what
1196 we do is punch out holes in the unlock range where locks owned by this process
1197 have a different lock context.
1198 ****************************************************************************/
1200 bool release_posix_lock_posix_flavour(files_struct *fsp,
1201 uint64_t u_offset,
1202 uint64_t u_count,
1203 const struct lock_context *lock_ctx,
1204 const struct lock_struct *plocks,
1205 int num_locks)
1207 bool ret = True;
1208 off_t offset;
1209 off_t count;
1210 TALLOC_CTX *ul_ctx = NULL;
1211 struct lock_list *ulist = NULL;
1212 struct lock_list *ul = NULL;
1214 DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1215 "count = %.0f\n", fsp_str_dbg(fsp),
1216 (double)u_offset, (double)u_count));
1219 * If the requested lock won't fit in the POSIX range, we will
1220 * pretend it was successful.
1223 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1224 return True;
1227 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1228 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1229 return False;
1232 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1233 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1234 talloc_destroy(ul_ctx);
1235 return False;
1239 * Create the initial list entry containing the
1240 * lock we want to remove.
1243 ZERO_STRUCTP(ul);
1244 ul->start = offset;
1245 ul->size = count;
1247 DLIST_ADD(ulist, ul);
1250 * Walk the given array creating a linked list
1251 * of unlock requests.
1254 ulist = posix_lock_list(ul_ctx,
1255 ulist,
1256 lock_ctx, /* Lock context ulist belongs to. */
1257 fsp,
1258 plocks,
1259 num_locks);
1262 * Release the POSIX locks on the list of ranges returned.
1265 for(; ulist; ulist = ulist->next) {
1266 offset = ulist->start;
1267 count = ulist->size;
1269 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1270 (double)offset, (double)count ));
1272 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1273 ret = False;
1277 talloc_destroy(ul_ctx);
1278 return ret;