908cd57d5d810d4191df955f1d45f397d05c11e4
[Samba.git] / source3 / locking / posix.c
blob908cd57d5d810d4191df955f1d45f397d05c11e4
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
35 * The pending close database handle.
38 static struct db_context *posix_pending_close_db;
40 /****************************************************************************
41 First - the functions that deal with the underlying system locks - these
42 functions are used no matter if we're mapping CIFS Windows locks or CIFS
43 POSIX locks onto POSIX.
44 ****************************************************************************/
46 /****************************************************************************
47 Utility function to map a lock type correctly depending on the open
48 mode of a file.
49 ****************************************************************************/
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
53 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
55 * Many UNIX's cannot get a write lock on a file opened read-only.
56 * Win32 locking semantics allow this.
57 * Do the best we can and attempt a read-only lock.
59 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60 return F_RDLCK;
64 * This return should be the most normal, as we attempt
65 * to always open files read/write.
68 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
71 /****************************************************************************
72 Debugging aid :-).
73 ****************************************************************************/
75 static const char *posix_lock_type_name(int lock_type)
77 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
80 /****************************************************************************
81 Check to see if the given unsigned lock range is within the possible POSIX
82 range. Modifies the given args to be in range if possible, just returns
83 False if not.
84 ****************************************************************************/
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87 uint64_t u_offset, uint64_t u_count)
89 off_t offset = (off_t)u_offset;
90 off_t count = (off_t)u_count;
93 * For the type of system we are, attempt to
94 * find the maximum positive lock offset as an off_t.
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
99 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
102 * In this case off_t is 64 bits,
103 * and the underlying system can handle 64 bit signed locks.
106 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107 off_t mask = (mask2<<1);
108 off_t max_positive_lock_offset = ~mask;
110 #endif
112 * POSIX locks of length zero mean lock to end-of-file.
113 * Win32 locks of length zero are point probes. Ignore
114 * any Win32 locks of length zero. JRA.
117 if (count == (off_t)0) {
118 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119 return False;
123 * If the given offset was > max_positive_lock_offset then we cannot map this at all
124 * ignore this lock.
127 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130 return False;
134 * We must truncate the count to less than max_positive_lock_offset.
137 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138 count = max_positive_lock_offset;
142 * Truncate count to end at max lock offset.
145 if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146 count = max_positive_lock_offset - offset;
150 * If we ate all the count, ignore this lock.
153 if (count == 0) {
154 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155 (double)u_offset, (double)u_count ));
156 return False;
160 * The mapping was successful.
163 DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164 (double)offset, (double)count ));
166 *offset_out = offset;
167 *count_out = count;
169 return True;
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173 struct files_struct *fsp, int op, off_t offset,
174 off_t count, int type)
176 VFS_FIND(lock);
177 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
180 /****************************************************************************
181 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182 broken NFS implementations.
183 ****************************************************************************/
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
187 bool ret;
189 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
190 fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
192 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
194 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
196 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
197 (double)offset,(double)count));
198 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
199 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
202 * If the offset is > 0x7FFFFFFF then this will cause problems on
203 * 32 bit NFS mounted filesystems. Just ignore it.
206 if (offset & ~((off_t)0x7fffffff)) {
207 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
208 return True;
211 if (count & ~((off_t)0x7fffffff)) {
212 /* 32 bit NFS file system, retry with smaller offset */
213 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
214 errno = 0;
215 count &= 0x7fffffff;
216 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
220 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
221 return ret;
224 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
225 struct files_struct *fsp, off_t *poffset,
226 off_t *pcount, int *ptype, pid_t *ppid)
228 VFS_FIND(getlock);
229 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
230 ppid);
233 /****************************************************************************
234 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
235 broken NFS implementations.
236 ****************************************************************************/
238 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
240 pid_t pid;
241 bool ret;
243 DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
244 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
246 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
248 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
250 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
251 (double)*poffset,(double)*pcount));
252 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
253 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
256 * If the offset is > 0x7FFFFFFF then this will cause problems on
257 * 32 bit NFS mounted filesystems. Just ignore it.
260 if (*poffset & ~((off_t)0x7fffffff)) {
261 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
262 return True;
265 if (*pcount & ~((off_t)0x7fffffff)) {
266 /* 32 bit NFS file system, retry with smaller offset */
267 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
268 errno = 0;
269 *pcount &= 0x7fffffff;
270 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
274 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
275 return ret;
278 /****************************************************************************
279 POSIX function to see if a file region is locked. Returns True if the
280 region is locked, False otherwise.
281 ****************************************************************************/
283 bool is_posix_locked(files_struct *fsp,
284 uint64_t *pu_offset,
285 uint64_t *pu_count,
286 enum brl_type *plock_type,
287 enum brl_flavour lock_flav)
289 off_t offset;
290 off_t count;
291 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
293 DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
294 "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
295 (double)*pu_count, posix_lock_type_name(*plock_type)));
298 * If the requested lock won't fit in the POSIX range, we will
299 * never set it, so presume it is not locked.
302 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
303 return False;
306 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
307 return False;
310 if (posix_lock_type == F_UNLCK) {
311 return False;
314 if (lock_flav == POSIX_LOCK) {
315 /* Only POSIX lock queries need to know the details. */
316 *pu_offset = (uint64_t)offset;
317 *pu_count = (uint64_t)count;
318 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
320 return True;
323 /****************************************************************************
324 Next - the functions that deal with in memory database storing representations
325 of either Windows CIFS locks or POSIX CIFS locks.
326 ****************************************************************************/
328 /* The key used in the in-memory POSIX databases. */
330 struct lock_ref_count_key {
331 struct file_id id;
332 char r;
335 /*******************************************************************
336 Form a static locking key for a dev/inode pair for the lock ref count
337 ******************************************************************/
339 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
340 struct lock_ref_count_key *tmp)
342 ZERO_STRUCTP(tmp);
343 tmp->id = fsp->file_id;
344 tmp->r = 'r';
345 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
348 /*******************************************************************
349 Convenience function to get an fd_array key from an fsp.
350 ******************************************************************/
352 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
354 return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
357 /*******************************************************************
358 Create the in-memory POSIX lock databases.
359 ********************************************************************/
361 bool posix_locking_init(bool read_only)
363 if (posix_pending_close_db != NULL) {
364 return true;
367 posix_pending_close_db = db_open_rbt(NULL);
369 if (posix_pending_close_db == NULL) {
370 DEBUG(0,("Failed to open POSIX pending close database.\n"));
371 return false;
374 return true;
377 /*******************************************************************
378 Delete the in-memory POSIX lock databases.
379 ********************************************************************/
381 bool posix_locking_end(void)
384 * Shouldn't we close all fd's here?
386 TALLOC_FREE(posix_pending_close_db);
387 return true;
390 /****************************************************************************
391 Next - the functions that deal with storing fd's that have outstanding
392 POSIX locks when closed.
393 ****************************************************************************/
395 /****************************************************************************
396 The records in posix_pending_close_db are composed of an array of
397 ints keyed by dev/ino pair. Those ints are the fd's that were open on
398 this dev/ino pair that should have been closed, but can't as the lock
399 ref count is non zero.
400 ****************************************************************************/
402 /****************************************************************************
403 Keep a reference count of the number of Windows locks open on this dev/ino
404 pair. Creates entry if it doesn't exist.
405 ****************************************************************************/
407 static void increment_windows_lock_ref_count(files_struct *fsp)
409 struct lock_ref_count_key tmp;
410 int32_t lock_ref_count = 0;
411 NTSTATUS status;
413 status = dbwrap_change_int32_atomic(
414 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
415 &lock_ref_count, 1);
417 SMB_ASSERT(NT_STATUS_IS_OK(status));
418 SMB_ASSERT(lock_ref_count < INT32_MAX);
420 DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
421 fsp_str_dbg(fsp), (int)lock_ref_count));
424 /****************************************************************************
425 Bulk delete - subtract as many locks as we've just deleted.
426 ****************************************************************************/
428 static void decrement_windows_lock_ref_count(files_struct *fsp)
430 struct lock_ref_count_key tmp;
431 int32_t lock_ref_count = 0;
432 NTSTATUS status;
434 status = dbwrap_change_int32_atomic(
435 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
436 &lock_ref_count, -1);
438 SMB_ASSERT(NT_STATUS_IS_OK(status));
439 SMB_ASSERT(lock_ref_count >= 0);
441 DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
442 fsp_str_dbg(fsp), (int)lock_ref_count));
445 /****************************************************************************
446 Fetch the lock ref count.
447 ****************************************************************************/
449 static int32_t get_windows_lock_ref_count(files_struct *fsp)
451 struct lock_ref_count_key tmp;
452 NTSTATUS status;
453 int32_t lock_ref_count = 0;
455 status = dbwrap_fetch_int32(
456 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
457 &lock_ref_count);
459 if (!NT_STATUS_IS_OK(status) &&
460 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
461 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
462 "lock ref count for file %s: %s\n",
463 fsp_str_dbg(fsp), nt_errstr(status)));
465 return lock_ref_count;
468 /****************************************************************************
469 Delete a lock_ref_count entry.
470 ****************************************************************************/
472 static void delete_windows_lock_ref_count(files_struct *fsp)
474 struct lock_ref_count_key tmp;
476 /* Not a bug if it doesn't exist - no locks were ever granted. */
478 dbwrap_delete(posix_pending_close_db,
479 locking_ref_count_key_fsp(fsp, &tmp));
481 DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
482 fsp_str_dbg(fsp)));
485 /****************************************************************************
486 Add an fd to the pending close tdb.
487 ****************************************************************************/
489 static void add_fd_to_close_entry(files_struct *fsp)
491 struct db_record *rec;
492 int *fds;
493 size_t num_fds;
494 NTSTATUS status;
495 TDB_DATA value;
497 rec = dbwrap_fetch_locked(
498 posix_pending_close_db, talloc_tos(),
499 fd_array_key_fsp(fsp));
501 SMB_ASSERT(rec != NULL);
503 value = dbwrap_record_get_value(rec);
504 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
506 num_fds = value.dsize / sizeof(int);
507 fds = talloc_array(rec, int, num_fds+1);
509 SMB_ASSERT(fds != NULL);
511 memcpy(fds, value.dptr, value.dsize);
512 fds[num_fds] = fsp->fh->fd;
514 status = dbwrap_record_store(
515 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
517 SMB_ASSERT(NT_STATUS_IS_OK(status));
519 TALLOC_FREE(rec);
521 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
522 fsp->fh->fd, fsp_str_dbg(fsp)));
525 /****************************************************************************
526 Remove all fd entries for a specific dev/inode pair from the tdb.
527 ****************************************************************************/
529 static void delete_close_entries(files_struct *fsp)
531 struct db_record *rec;
533 rec = dbwrap_fetch_locked(
534 posix_pending_close_db, talloc_tos(),
535 fd_array_key_fsp(fsp));
537 SMB_ASSERT(rec != NULL);
538 dbwrap_record_delete(rec);
539 TALLOC_FREE(rec);
542 /****************************************************************************
543 Get the array of POSIX pending close records for an open fsp. Returns number
544 of entries.
545 ****************************************************************************/
547 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
548 files_struct *fsp, int **entries)
550 TDB_DATA dbuf;
551 NTSTATUS status;
553 status = dbwrap_fetch(
554 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
555 &dbuf);
557 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
558 *entries = NULL;
559 return 0;
562 SMB_ASSERT(NT_STATUS_IS_OK(status));
564 if (dbuf.dsize == 0) {
565 *entries = NULL;
566 return 0;
569 *entries = (int *)dbuf.dptr;
570 return (size_t)(dbuf.dsize / sizeof(int));
573 /****************************************************************************
574 Deal with pending closes needed by POSIX locking support.
575 Note that posix_locking_close_file() is expected to have been called
576 to delete all locks on this fsp before this function is called.
577 ****************************************************************************/
579 int fd_close_posix(struct files_struct *fsp)
581 int saved_errno = 0;
582 int ret;
583 int *fd_array = NULL;
584 size_t count, i;
586 if (!lp_locking(fsp->conn->params) ||
587 !lp_posix_locking(fsp->conn->params))
590 * No locking or POSIX to worry about or we want POSIX semantics
591 * which will lose all locks on all fd's open on this dev/inode,
592 * just close.
594 return close(fsp->fh->fd);
597 if (get_windows_lock_ref_count(fsp)) {
600 * There are outstanding locks on this dev/inode pair on
601 * other fds. Add our fd to the pending close tdb and set
602 * fsp->fh->fd to -1.
605 add_fd_to_close_entry(fsp);
606 return 0;
610 * No outstanding locks. Get the pending close fd's
611 * from the tdb and close them all.
614 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
616 if (count) {
617 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
618 (unsigned int)count));
620 for(i = 0; i < count; i++) {
621 if (close(fd_array[i]) == -1) {
622 saved_errno = errno;
627 * Delete all fd's stored in the tdb
628 * for this dev/inode pair.
631 delete_close_entries(fsp);
634 TALLOC_FREE(fd_array);
636 /* Don't need a lock ref count on this dev/ino anymore. */
637 delete_windows_lock_ref_count(fsp);
640 * Finally close the fd associated with this fsp.
643 ret = close(fsp->fh->fd);
645 if (ret == 0 && saved_errno != 0) {
646 errno = saved_errno;
647 ret = -1;
650 return ret;
653 /****************************************************************************
654 Next - the functions that deal with the mapping CIFS Windows locks onto
655 the underlying system POSIX locks.
656 ****************************************************************************/
659 * Structure used when splitting a lock range
660 * into a POSIX lock range. Doubly linked list.
663 struct lock_list {
664 struct lock_list *next;
665 struct lock_list *prev;
666 off_t start;
667 off_t size;
670 /****************************************************************************
671 Create a list of lock ranges that don't overlap a given range. Used in calculating
672 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
673 understand it :-).
674 ****************************************************************************/
676 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
677 struct lock_list *lhead,
678 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
679 files_struct *fsp,
680 const struct lock_struct *plocks,
681 int num_locks)
683 int i;
686 * Check the current lock list on this dev/inode pair.
687 * Quit if the list is deleted.
690 DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
691 (double)lhead->start, (double)lhead->size ));
693 for (i=0; i<num_locks && lhead; i++) {
694 const struct lock_struct *lock = &plocks[i];
695 struct lock_list *l_curr;
697 /* Ignore all but read/write locks. */
698 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
699 continue;
702 /* Ignore locks not owned by this process. */
703 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
704 continue;
708 * Walk the lock list, checking for overlaps. Note that
709 * the lock list can expand within this loop if the current
710 * range being examined needs to be split.
713 for (l_curr = lhead; l_curr;) {
715 DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
716 (unsigned long long)lock->fnum,
717 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
719 if ( (l_curr->start >= (lock->start + lock->size)) ||
720 (lock->start >= (l_curr->start + l_curr->size))) {
722 /* No overlap with existing lock - leave this range alone. */
723 /*********************************************
724 +---------+
725 | l_curr |
726 +---------+
727 +-------+
728 | lock |
729 +-------+
730 OR....
731 +---------+
732 | l_curr |
733 +---------+
734 **********************************************/
736 DEBUG(10,(" no overlap case.\n" ));
738 l_curr = l_curr->next;
740 } else if ( (l_curr->start >= lock->start) &&
741 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
744 * This range is completely overlapped by this existing lock range
745 * and thus should have no effect. Delete it from the list.
747 /*********************************************
748 +---------+
749 | l_curr |
750 +---------+
751 +---------------------------+
752 | lock |
753 +---------------------------+
754 **********************************************/
755 /* Save the next pointer */
756 struct lock_list *ul_next = l_curr->next;
758 DEBUG(10,(" delete case.\n" ));
760 DLIST_REMOVE(lhead, l_curr);
761 if(lhead == NULL) {
762 break; /* No more list... */
765 l_curr = ul_next;
767 } else if ( (l_curr->start >= lock->start) &&
768 (l_curr->start < lock->start + lock->size) &&
769 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
772 * This range overlaps the existing lock range at the high end.
773 * Truncate by moving start to existing range end and reducing size.
775 /*********************************************
776 +---------------+
777 | l_curr |
778 +---------------+
779 +---------------+
780 | lock |
781 +---------------+
782 BECOMES....
783 +-------+
784 | l_curr|
785 +-------+
786 **********************************************/
788 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
789 l_curr->start = lock->start + lock->size;
791 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
792 (double)l_curr->start, (double)l_curr->size ));
794 l_curr = l_curr->next;
796 } else if ( (l_curr->start < lock->start) &&
797 (l_curr->start + l_curr->size > lock->start) &&
798 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
801 * This range overlaps the existing lock range at the low end.
802 * Truncate by reducing size.
804 /*********************************************
805 +---------------+
806 | l_curr |
807 +---------------+
808 +---------------+
809 | lock |
810 +---------------+
811 BECOMES....
812 +-------+
813 | l_curr|
814 +-------+
815 **********************************************/
817 l_curr->size = lock->start - l_curr->start;
819 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
820 (double)l_curr->start, (double)l_curr->size ));
822 l_curr = l_curr->next;
824 } else if ( (l_curr->start < lock->start) &&
825 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
827 * Worst case scenario. Range completely overlaps an existing
828 * lock range. Split the request into two, push the new (upper) request
829 * into the dlink list, and continue with the entry after l_new (as we
830 * know that l_new will not overlap with this lock).
832 /*********************************************
833 +---------------------------+
834 | l_curr |
835 +---------------------------+
836 +---------+
837 | lock |
838 +---------+
839 BECOMES.....
840 +-------+ +---------+
841 | l_curr| | l_new |
842 +-------+ +---------+
843 **********************************************/
844 struct lock_list *l_new = talloc(ctx, struct lock_list);
846 if(l_new == NULL) {
847 DEBUG(0,("posix_lock_list: talloc fail.\n"));
848 return NULL; /* The talloc_destroy takes care of cleanup. */
851 ZERO_STRUCTP(l_new);
852 l_new->start = lock->start + lock->size;
853 l_new->size = l_curr->start + l_curr->size - l_new->start;
855 /* Truncate the l_curr. */
856 l_curr->size = lock->start - l_curr->start;
858 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
859 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
860 (double)l_new->start, (double)l_new->size ));
863 * Add into the dlink list after the l_curr point - NOT at lhead.
865 DLIST_ADD_AFTER(lhead, l_new, l_curr);
867 /* And move after the link we added. */
868 l_curr = l_new->next;
870 } else {
873 * This logic case should never happen. Ensure this is the
874 * case by forcing an abort.... Remove in production.
876 char *msg = NULL;
878 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
879 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
880 smb_panic(msg);
881 } else {
882 smb_panic("posix_lock_list");
885 } /* end for ( l_curr = lhead; l_curr;) */
886 } /* end for (i=0; i<num_locks && ul_head; i++) */
888 return lhead;
891 /****************************************************************************
892 POSIX function to acquire a lock. Returns True if the
893 lock could be granted, False if not.
894 ****************************************************************************/
896 bool set_posix_lock_windows_flavour(files_struct *fsp,
897 uint64_t u_offset,
898 uint64_t u_count,
899 enum brl_type lock_type,
900 const struct lock_context *lock_ctx,
901 const struct lock_struct *plocks,
902 int num_locks,
903 int *errno_ret)
905 off_t offset;
906 off_t count;
907 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
908 bool ret = True;
909 size_t lock_count;
910 TALLOC_CTX *l_ctx = NULL;
911 struct lock_list *llist = NULL;
912 struct lock_list *ll = NULL;
914 DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
915 "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
916 (double)u_offset, (double)u_count,
917 posix_lock_type_name(lock_type)));
920 * If the requested lock won't fit in the POSIX range, we will
921 * pretend it was successful.
924 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
925 increment_windows_lock_ref_count(fsp);
926 return True;
930 * Windows is very strange. It allows read locks to be overlayed
931 * (even over a write lock), but leaves the write lock in force until the first
932 * unlock. It also reference counts the locks. This means the following sequence :
934 * process1 process2
935 * ------------------------------------------------------------------------
936 * WRITE LOCK : start = 2, len = 10
937 * READ LOCK: start =0, len = 10 - FAIL
938 * READ LOCK : start = 0, len = 14
939 * READ LOCK: start =0, len = 10 - FAIL
940 * UNLOCK : start = 2, len = 10
941 * READ LOCK: start =0, len = 10 - OK
943 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
944 * would leave a single read lock over the 0-14 region.
947 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
948 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
949 return False;
952 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
953 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
954 talloc_destroy(l_ctx);
955 return False;
959 * Create the initial list entry containing the
960 * lock we want to add.
963 ZERO_STRUCTP(ll);
964 ll->start = offset;
965 ll->size = count;
967 DLIST_ADD(llist, ll);
970 * The following call calculates if there are any
971 * overlapping locks held by this process on
972 * fd's open on the same file and splits this list
973 * into a list of lock ranges that do not overlap with existing
974 * POSIX locks.
977 llist = posix_lock_list(l_ctx,
978 llist,
979 lock_ctx, /* Lock context llist belongs to. */
980 fsp,
981 plocks,
982 num_locks);
985 * Add the POSIX locks on the list of ranges returned.
986 * As the lock is supposed to be added atomically, we need to
987 * back out all the locks if any one of these calls fail.
990 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
991 offset = ll->start;
992 count = ll->size;
994 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
995 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
997 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
998 *errno_ret = errno;
999 DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1000 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1001 ret = False;
1002 break;
1006 if (!ret) {
1009 * Back out all the POSIX locks we have on fail.
1012 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1013 offset = ll->start;
1014 count = ll->size;
1016 DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1017 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1019 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1021 } else {
1022 /* Remember the number of Windows locks we have on this dev/ino pair. */
1023 increment_windows_lock_ref_count(fsp);
1026 talloc_destroy(l_ctx);
1027 return ret;
1030 /****************************************************************************
1031 POSIX function to release a lock. Returns True if the
1032 lock could be released, False if not.
1033 ****************************************************************************/
1035 bool release_posix_lock_windows_flavour(files_struct *fsp,
1036 uint64_t u_offset,
1037 uint64_t u_count,
1038 enum brl_type deleted_lock_type,
1039 const struct lock_context *lock_ctx,
1040 const struct lock_struct *plocks,
1041 int num_locks)
1043 off_t offset;
1044 off_t count;
1045 bool ret = True;
1046 TALLOC_CTX *ul_ctx = NULL;
1047 struct lock_list *ulist = NULL;
1048 struct lock_list *ul = NULL;
1050 DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1051 "count = %.0f\n", fsp_str_dbg(fsp),
1052 (double)u_offset, (double)u_count));
1054 /* Remember the number of Windows locks we have on this dev/ino pair. */
1055 decrement_windows_lock_ref_count(fsp);
1058 * If the requested lock won't fit in the POSIX range, we will
1059 * pretend it was successful.
1062 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1063 return True;
1066 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1067 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1068 return False;
1071 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1072 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1073 talloc_destroy(ul_ctx);
1074 return False;
1078 * Create the initial list entry containing the
1079 * lock we want to remove.
1082 ZERO_STRUCTP(ul);
1083 ul->start = offset;
1084 ul->size = count;
1086 DLIST_ADD(ulist, ul);
1089 * The following call calculates if there are any
1090 * overlapping locks held by this process on
1091 * fd's open on the same file and creates a
1092 * list of unlock ranges that will allow
1093 * POSIX lock ranges to remain on the file whilst the
1094 * unlocks are performed.
1097 ulist = posix_lock_list(ul_ctx,
1098 ulist,
1099 lock_ctx, /* Lock context ulist belongs to. */
1100 fsp,
1101 plocks,
1102 num_locks);
1105 * If there were any overlapped entries (list is > 1 or size or start have changed),
1106 * and the lock_type we just deleted from
1107 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1108 * the POSIX lock to a read lock. This allows any overlapping read locks
1109 * to be atomically maintained.
1112 if (deleted_lock_type == WRITE_LOCK &&
1113 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1115 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1116 (double)offset, (double)count ));
1118 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1119 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1120 talloc_destroy(ul_ctx);
1121 return False;
1126 * Release the POSIX locks on the list of ranges returned.
1129 for(; ulist; ulist = ulist->next) {
1130 offset = ulist->start;
1131 count = ulist->size;
1133 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1134 (double)offset, (double)count ));
1136 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1137 ret = False;
1141 talloc_destroy(ul_ctx);
1142 return ret;
1145 /****************************************************************************
1146 Next - the functions that deal with mapping CIFS POSIX locks onto
1147 the underlying system POSIX locks.
1148 ****************************************************************************/
1150 /****************************************************************************
1151 POSIX function to acquire a lock. Returns True if the
1152 lock could be granted, False if not.
1153 As POSIX locks don't stack or conflict (they just overwrite)
1154 we can map the requested lock directly onto a system one. We
1155 know it doesn't conflict with locks on other contexts as the
1156 upper layer would have refused it.
1157 ****************************************************************************/
1159 bool set_posix_lock_posix_flavour(files_struct *fsp,
1160 uint64_t u_offset,
1161 uint64_t u_count,
1162 enum brl_type lock_type,
1163 int *errno_ret)
1165 off_t offset;
1166 off_t count;
1167 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1169 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1170 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1171 (uintmax_t)u_offset, (uintmax_t)u_count,
1172 posix_lock_type_name(lock_type)));
1175 * If the requested lock won't fit in the POSIX range, we will
1176 * pretend it was successful.
1179 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1180 return True;
1183 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1184 *errno_ret = errno;
1185 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1186 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1187 return False;
1189 return True;
1192 /****************************************************************************
1193 POSIX function to release a lock. Returns True if the
1194 lock could be released, False if not.
1195 We are given a complete lock state from the upper layer which is what the lock
1196 state should be after the unlock has already been done, so what
1197 we do is punch out holes in the unlock range where locks owned by this process
1198 have a different lock context.
1199 ****************************************************************************/
1201 bool release_posix_lock_posix_flavour(files_struct *fsp,
1202 uint64_t u_offset,
1203 uint64_t u_count,
1204 const struct lock_context *lock_ctx,
1205 const struct lock_struct *plocks,
1206 int num_locks)
1208 bool ret = True;
1209 off_t offset;
1210 off_t count;
1211 TALLOC_CTX *ul_ctx = NULL;
1212 struct lock_list *ulist = NULL;
1213 struct lock_list *ul = NULL;
1215 DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1216 "count = %.0f\n", fsp_str_dbg(fsp),
1217 (double)u_offset, (double)u_count));
1220 * If the requested lock won't fit in the POSIX range, we will
1221 * pretend it was successful.
1224 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1225 return True;
1228 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1229 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1230 return False;
1233 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1234 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1235 talloc_destroy(ul_ctx);
1236 return False;
1240 * Create the initial list entry containing the
1241 * lock we want to remove.
1244 ZERO_STRUCTP(ul);
1245 ul->start = offset;
1246 ul->size = count;
1248 DLIST_ADD(ulist, ul);
1251 * Walk the given array creating a linked list
1252 * of unlock requests.
1255 ulist = posix_lock_list(ul_ctx,
1256 ulist,
1257 lock_ctx, /* Lock context ulist belongs to. */
1258 fsp,
1259 plocks,
1260 num_locks);
1263 * Release the POSIX locks on the list of ranges returned.
1266 for(; ulist; ulist = ulist->next) {
1267 offset = ulist->start;
1268 count = ulist->size;
1270 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1271 (double)offset, (double)count ));
1273 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1274 ret = False;
1278 talloc_destroy(ul_ctx);
1279 return ret;