ctdb-recoverd: Re-check master on failure to take recovery lock
[Samba.git] / source3 / locking / posix.c
blob79c33cfb0faef34bae1caf5a6d007aff61603f81
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
32 #undef DBGC_CLASS
33 #define DBGC_CLASS DBGC_LOCKING
36 * The pending close database handle.
39 static struct db_context *posix_pending_close_db;
41 /****************************************************************************
42 First - the functions that deal with the underlying system locks - these
43 functions are used no matter if we're mapping CIFS Windows locks or CIFS
44 POSIX locks onto POSIX.
45 ****************************************************************************/
47 /****************************************************************************
48 Utility function to map a lock type correctly depending on the open
49 mode of a file.
50 ****************************************************************************/
52 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
54 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
56 * Many UNIX's cannot get a write lock on a file opened read-only.
57 * Win32 locking semantics allow this.
58 * Do the best we can and attempt a read-only lock.
60 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
61 return F_RDLCK;
65 * This return should be the most normal, as we attempt
66 * to always open files read/write.
69 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
72 /****************************************************************************
73 Debugging aid :-).
74 ****************************************************************************/
76 static const char *posix_lock_type_name(int lock_type)
78 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
81 /****************************************************************************
82 Check to see if the given unsigned lock range is within the possible POSIX
83 range. Modifies the given args to be in range if possible, just returns
84 False if not.
85 ****************************************************************************/
87 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
88 uint64_t u_offset, uint64_t u_count)
90 off_t offset = (off_t)u_offset;
91 off_t count = (off_t)u_count;
94 * For the type of system we are, attempt to
95 * find the maximum positive lock offset as an off_t.
98 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
100 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
101 #else
103 * In this case off_t is 64 bits,
104 * and the underlying system can handle 64 bit signed locks.
107 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
108 off_t mask = (mask2<<1);
109 off_t max_positive_lock_offset = ~mask;
111 #endif
113 * POSIX locks of length zero mean lock to end-of-file.
114 * Win32 locks of length zero are point probes. Ignore
115 * any Win32 locks of length zero. JRA.
118 if (count == 0) {
119 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
120 return False;
124 * If the given offset was > max_positive_lock_offset then we cannot map this at all
125 * ignore this lock.
128 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
129 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
130 "and we cannot handle this. Ignoring lock.\n",
131 (uintmax_t)u_offset,
132 (uintmax_t)max_positive_lock_offset));
133 return False;
137 * We must truncate the count to less than max_positive_lock_offset.
140 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
141 count = max_positive_lock_offset;
145 * Truncate count to end at max lock offset.
148 if (offset > INT64_MAX - count ||
149 offset + count > max_positive_lock_offset) {
150 count = max_positive_lock_offset - offset;
154 * If we ate all the count, ignore this lock.
157 if (count == 0) {
158 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
159 "u_offset = %ju, u_count = %ju\n",
160 (uintmax_t)u_offset,
161 (uintmax_t)u_count));
162 return False;
166 * The mapping was successful.
169 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
170 "count_out = %ju\n",
171 (uintmax_t)offset, (uintmax_t)count));
173 *offset_out = offset;
174 *count_out = count;
176 return True;
179 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
180 struct files_struct *fsp, int op, off_t offset,
181 off_t count, int type)
183 VFS_FIND(lock);
184 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
187 /****************************************************************************
188 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
189 broken NFS implementations.
190 ****************************************************************************/
192 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
194 bool ret;
196 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
197 fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
199 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
201 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
203 if ((errno == EINVAL) &&
204 (op != F_GETLK &&
205 op != F_SETLK &&
206 op != F_SETLKW)) {
207 DEBUG(0,("WARNING: OFD locks in use and no kernel "
208 "support. Try setting "
209 "'smbd:force process locks = true' "
210 "in smb.conf\n"));
211 } else {
212 DEBUG(0, ("WARNING: lock request at offset "
213 "%ju, length %ju returned\n",
214 (uintmax_t)offset, (uintmax_t)count));
215 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
216 "lock offsets\n", strerror(errno)));
217 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
221 * If the offset is > 0x7FFFFFFF then this will cause problems on
222 * 32 bit NFS mounted filesystems. Just ignore it.
225 if (offset & ~((off_t)0x7fffffff)) {
226 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
227 return True;
230 if (count & ~((off_t)0x7fffffff)) {
231 /* 32 bit NFS file system, retry with smaller offset */
232 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
233 errno = 0;
234 count &= 0x7fffffff;
235 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
239 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
240 return ret;
243 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
244 struct files_struct *fsp, off_t *poffset,
245 off_t *pcount, int *ptype, pid_t *ppid)
247 VFS_FIND(getlock);
248 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
249 ppid);
252 /****************************************************************************
253 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
254 broken NFS implementations.
255 ****************************************************************************/
257 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
259 pid_t pid;
260 bool ret;
262 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
263 fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
264 *ptype));
266 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
268 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
270 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
271 "offset %ju, length %ju returned\n",
272 (uintmax_t)*poffset, (uintmax_t)*pcount));
273 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
274 "lock offsets\n", strerror(errno)));
275 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
278 * If the offset is > 0x7FFFFFFF then this will cause problems on
279 * 32 bit NFS mounted filesystems. Just ignore it.
282 if (*poffset & ~((off_t)0x7fffffff)) {
283 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
284 return True;
287 if (*pcount & ~((off_t)0x7fffffff)) {
288 /* 32 bit NFS file system, retry with smaller offset */
289 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
290 errno = 0;
291 *pcount &= 0x7fffffff;
292 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
296 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
297 return ret;
300 /****************************************************************************
301 POSIX function to see if a file region is locked. Returns True if the
302 region is locked, False otherwise.
303 ****************************************************************************/
305 bool is_posix_locked(files_struct *fsp,
306 uint64_t *pu_offset,
307 uint64_t *pu_count,
308 enum brl_type *plock_type,
309 enum brl_flavour lock_flav)
311 off_t offset;
312 off_t count;
313 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
315 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
316 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
317 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
320 * If the requested lock won't fit in the POSIX range, we will
321 * never set it, so presume it is not locked.
324 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
325 return False;
328 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
329 return False;
332 if (posix_lock_type == F_UNLCK) {
333 return False;
336 if (lock_flav == POSIX_LOCK) {
337 /* Only POSIX lock queries need to know the details. */
338 *pu_offset = (uint64_t)offset;
339 *pu_count = (uint64_t)count;
340 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
342 return True;
345 /****************************************************************************
346 Next - the functions that deal with in memory database storing representations
347 of either Windows CIFS locks or POSIX CIFS locks.
348 ****************************************************************************/
350 /* The key used in the in-memory POSIX databases. */
352 struct lock_ref_count_key {
353 struct file_id id;
354 char r;
357 /*******************************************************************
358 Form a static locking key for a dev/inode pair for the lock ref count
359 ******************************************************************/
361 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
362 struct lock_ref_count_key *tmp)
364 ZERO_STRUCTP(tmp);
365 tmp->id = fsp->file_id;
366 tmp->r = 'r';
367 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
370 /*******************************************************************
371 Convenience function to get an fd_array key from an fsp.
372 ******************************************************************/
374 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
376 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
379 /*******************************************************************
380 Create the in-memory POSIX lock databases.
381 ********************************************************************/
383 bool posix_locking_init(bool read_only)
385 if (posix_pending_close_db != NULL) {
386 return true;
389 posix_pending_close_db = db_open_rbt(NULL);
391 if (posix_pending_close_db == NULL) {
392 DEBUG(0,("Failed to open POSIX pending close database.\n"));
393 return false;
396 return true;
399 /*******************************************************************
400 Delete the in-memory POSIX lock databases.
401 ********************************************************************/
403 bool posix_locking_end(void)
406 * Shouldn't we close all fd's here?
408 TALLOC_FREE(posix_pending_close_db);
409 return true;
412 /****************************************************************************
413 Next - the functions that deal with reference count of number of locks open
414 on a dev/ino pair.
415 ****************************************************************************/
417 /****************************************************************************
418 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
419 ****************************************************************************/
421 static void increment_lock_ref_count(const files_struct *fsp)
423 struct lock_ref_count_key tmp;
424 int32_t lock_ref_count = 0;
425 NTSTATUS status;
427 status = dbwrap_change_int32_atomic(
428 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
429 &lock_ref_count, 1);
431 SMB_ASSERT(NT_STATUS_IS_OK(status));
432 SMB_ASSERT(lock_ref_count < INT32_MAX);
434 DEBUG(10,("lock_ref_count for file %s = %d\n",
435 fsp_str_dbg(fsp), (int)lock_ref_count));
438 /****************************************************************************
439 Reduce the lock ref count.
440 ****************************************************************************/
442 static void decrement_lock_ref_count(const files_struct *fsp)
444 struct lock_ref_count_key tmp;
445 int32_t lock_ref_count = 0;
446 NTSTATUS status;
448 status = dbwrap_change_int32_atomic(
449 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
450 &lock_ref_count, -1);
452 SMB_ASSERT(NT_STATUS_IS_OK(status));
453 SMB_ASSERT(lock_ref_count > 0);
455 DEBUG(10,("lock_ref_count for file %s = %d\n",
456 fsp_str_dbg(fsp), (int)lock_ref_count));
459 /****************************************************************************
460 Fetch the lock ref count.
461 ****************************************************************************/
463 static int32_t get_lock_ref_count(const files_struct *fsp)
465 struct lock_ref_count_key tmp;
466 NTSTATUS status;
467 int32_t lock_ref_count = 0;
469 status = dbwrap_fetch_int32(
470 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
471 &lock_ref_count);
473 if (!NT_STATUS_IS_OK(status) &&
474 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
475 DEBUG(0, ("Error fetching "
476 "lock ref count for file %s: %s\n",
477 fsp_str_dbg(fsp), nt_errstr(status)));
479 return lock_ref_count;
482 /****************************************************************************
483 Delete a lock_ref_count entry.
484 ****************************************************************************/
486 static void delete_lock_ref_count(const files_struct *fsp)
488 struct lock_ref_count_key tmp;
490 /* Not a bug if it doesn't exist - no locks were ever granted. */
492 dbwrap_delete(posix_pending_close_db,
493 locking_ref_count_key_fsp(fsp, &tmp));
495 DEBUG(10,("delete_lock_ref_count for file %s\n",
496 fsp_str_dbg(fsp)));
499 /****************************************************************************
500 Next - the functions that deal with storing fd's that have outstanding
501 POSIX locks when closed.
502 ****************************************************************************/
504 /****************************************************************************
505 The records in posix_pending_close_db are composed of an array of
506 ints keyed by dev/ino pair. Those ints are the fd's that were open on
507 this dev/ino pair that should have been closed, but can't as the lock
508 ref count is non zero.
509 ****************************************************************************/
511 /****************************************************************************
512 Add an fd to the pending close tdb.
513 ****************************************************************************/
515 static void add_fd_to_close_entry(const files_struct *fsp)
517 struct db_record *rec;
518 int *fds;
519 size_t num_fds;
520 NTSTATUS status;
521 TDB_DATA value;
523 rec = dbwrap_fetch_locked(
524 posix_pending_close_db, talloc_tos(),
525 fd_array_key_fsp(fsp));
527 SMB_ASSERT(rec != NULL);
529 value = dbwrap_record_get_value(rec);
530 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
532 num_fds = value.dsize / sizeof(int);
533 fds = talloc_array(rec, int, num_fds+1);
535 SMB_ASSERT(fds != NULL);
537 memcpy(fds, value.dptr, value.dsize);
538 fds[num_fds] = fsp->fh->fd;
540 status = dbwrap_record_store(
541 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
543 SMB_ASSERT(NT_STATUS_IS_OK(status));
545 TALLOC_FREE(rec);
547 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
548 fsp->fh->fd, fsp_str_dbg(fsp)));
551 /****************************************************************************
552 Remove all fd entries for a specific dev/inode pair from the tdb.
553 ****************************************************************************/
555 static void delete_close_entries(const files_struct *fsp)
557 struct db_record *rec;
559 rec = dbwrap_fetch_locked(
560 posix_pending_close_db, talloc_tos(),
561 fd_array_key_fsp(fsp));
563 SMB_ASSERT(rec != NULL);
564 dbwrap_record_delete(rec);
565 TALLOC_FREE(rec);
568 /****************************************************************************
569 Get the array of POSIX pending close records for an open fsp. Returns number
570 of entries.
571 ****************************************************************************/
573 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
574 const files_struct *fsp,
575 int **entries)
577 TDB_DATA dbuf;
578 NTSTATUS status;
580 status = dbwrap_fetch(
581 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
582 &dbuf);
584 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
585 *entries = NULL;
586 return 0;
589 SMB_ASSERT(NT_STATUS_IS_OK(status));
591 if (dbuf.dsize == 0) {
592 *entries = NULL;
593 return 0;
596 *entries = (int *)dbuf.dptr;
597 return (size_t)(dbuf.dsize / sizeof(int));
600 /****************************************************************************
601 Deal with pending closes needed by POSIX locking support.
602 Note that posix_locking_close_file() is expected to have been called
603 to delete all locks on this fsp before this function is called.
604 ****************************************************************************/
606 int fd_close_posix(const struct files_struct *fsp)
608 int saved_errno = 0;
609 int ret;
610 int *fd_array = NULL;
611 size_t count, i;
613 if (!lp_locking(fsp->conn->params) ||
614 !lp_posix_locking(fsp->conn->params) ||
615 fsp->use_ofd_locks)
618 * No locking or POSIX to worry about or we are using POSIX
619 * open file description lock semantics which only removes
620 * locks on the file descriptor we're closing. Just close.
622 return close(fsp->fh->fd);
625 if (get_lock_ref_count(fsp)) {
628 * There are outstanding locks on this dev/inode pair on
629 * other fds. Add our fd to the pending close db. We also
630 * set fsp->fh->fd to -1 inside fd_close() after returning
631 * from VFS layer.
634 add_fd_to_close_entry(fsp);
635 return 0;
639 * No outstanding locks. Get the pending close fd's
640 * from the tdb and close them all.
643 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
645 if (count) {
646 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
647 (unsigned int)count));
649 for(i = 0; i < count; i++) {
650 if (close(fd_array[i]) == -1) {
651 saved_errno = errno;
656 * Delete all fd's stored in the tdb
657 * for this dev/inode pair.
660 delete_close_entries(fsp);
663 TALLOC_FREE(fd_array);
665 /* Don't need a lock ref count on this dev/ino anymore. */
666 delete_lock_ref_count(fsp);
669 * Finally close the fd associated with this fsp.
672 ret = close(fsp->fh->fd);
674 if (ret == 0 && saved_errno != 0) {
675 errno = saved_errno;
676 ret = -1;
679 return ret;
682 /****************************************************************************
683 Next - the functions that deal with the mapping CIFS Windows locks onto
684 the underlying system POSIX locks.
685 ****************************************************************************/
688 * Structure used when splitting a lock range
689 * into a POSIX lock range. Doubly linked list.
692 struct lock_list {
693 struct lock_list *next;
694 struct lock_list *prev;
695 off_t start;
696 off_t size;
699 /****************************************************************************
700 Create a list of lock ranges that don't overlap a given range. Used in calculating
701 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
702 understand it :-).
703 ****************************************************************************/
705 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
706 struct lock_list *lhead,
707 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
708 const struct lock_struct *plocks,
709 int num_locks)
711 int i;
714 * Check the current lock list on this dev/inode pair.
715 * Quit if the list is deleted.
718 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
719 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
721 for (i=0; i<num_locks && lhead; i++) {
722 const struct lock_struct *lock = &plocks[i];
723 struct lock_list *l_curr;
725 /* Ignore all but read/write locks. */
726 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
727 continue;
730 /* Ignore locks not owned by this process. */
731 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
732 continue;
736 * Walk the lock list, checking for overlaps. Note that
737 * the lock list can expand within this loop if the current
738 * range being examined needs to be split.
741 for (l_curr = lhead; l_curr;) {
743 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
744 "start=%ju,size=%ju:type=%s",
745 (uintmax_t)lock->fnum,
746 (uintmax_t)lock->start,
747 (uintmax_t)lock->size,
748 posix_lock_type_name(lock->lock_type) ));
750 if ( (l_curr->start >= (lock->start + lock->size)) ||
751 (lock->start >= (l_curr->start + l_curr->size))) {
753 /* No overlap with existing lock - leave this range alone. */
754 /*********************************************
755 +---------+
756 | l_curr |
757 +---------+
758 +-------+
759 | lock |
760 +-------+
761 OR....
762 +---------+
763 | l_curr |
764 +---------+
765 **********************************************/
767 DEBUG(10,(" no overlap case.\n" ));
769 l_curr = l_curr->next;
771 } else if ( (l_curr->start >= lock->start) &&
772 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
775 * This range is completely overlapped by this existing lock range
776 * and thus should have no effect. Delete it from the list.
778 /*********************************************
779 +---------+
780 | l_curr |
781 +---------+
782 +---------------------------+
783 | lock |
784 +---------------------------+
785 **********************************************/
786 /* Save the next pointer */
787 struct lock_list *ul_next = l_curr->next;
789 DEBUG(10,(" delete case.\n" ));
791 DLIST_REMOVE(lhead, l_curr);
792 if(lhead == NULL) {
793 break; /* No more list... */
796 l_curr = ul_next;
798 } else if ( (l_curr->start >= lock->start) &&
799 (l_curr->start < lock->start + lock->size) &&
800 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
803 * This range overlaps the existing lock range at the high end.
804 * Truncate by moving start to existing range end and reducing size.
806 /*********************************************
807 +---------------+
808 | l_curr |
809 +---------------+
810 +---------------+
811 | lock |
812 +---------------+
813 BECOMES....
814 +-------+
815 | l_curr|
816 +-------+
817 **********************************************/
819 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
820 l_curr->start = lock->start + lock->size;
822 DEBUG(10, (" truncate high case: start=%ju,"
823 "size=%ju\n",
824 (uintmax_t)l_curr->start,
825 (uintmax_t)l_curr->size ));
827 l_curr = l_curr->next;
829 } else if ( (l_curr->start < lock->start) &&
830 (l_curr->start + l_curr->size > lock->start) &&
831 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
834 * This range overlaps the existing lock range at the low end.
835 * Truncate by reducing size.
837 /*********************************************
838 +---------------+
839 | l_curr |
840 +---------------+
841 +---------------+
842 | lock |
843 +---------------+
844 BECOMES....
845 +-------+
846 | l_curr|
847 +-------+
848 **********************************************/
850 l_curr->size = lock->start - l_curr->start;
852 DEBUG(10, (" truncate low case: start=%ju,"
853 "size=%ju\n",
854 (uintmax_t)l_curr->start,
855 (uintmax_t)l_curr->size ));
857 l_curr = l_curr->next;
859 } else if ( (l_curr->start < lock->start) &&
860 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
862 * Worst case scenario. Range completely overlaps an existing
863 * lock range. Split the request into two, push the new (upper) request
864 * into the dlink list, and continue with the entry after l_new (as we
865 * know that l_new will not overlap with this lock).
867 /*********************************************
868 +---------------------------+
869 | l_curr |
870 +---------------------------+
871 +---------+
872 | lock |
873 +---------+
874 BECOMES.....
875 +-------+ +---------+
876 | l_curr| | l_new |
877 +-------+ +---------+
878 **********************************************/
879 struct lock_list *l_new = talloc(ctx, struct lock_list);
881 if(l_new == NULL) {
882 DEBUG(0,("posix_lock_list: talloc fail.\n"));
883 return NULL; /* The talloc_destroy takes care of cleanup. */
886 ZERO_STRUCTP(l_new);
887 l_new->start = lock->start + lock->size;
888 l_new->size = l_curr->start + l_curr->size - l_new->start;
890 /* Truncate the l_curr. */
891 l_curr->size = lock->start - l_curr->start;
893 DEBUG(10, (" split case: curr: start=%ju,"
894 "size=%ju new: start=%ju,"
895 "size=%ju\n",
896 (uintmax_t)l_curr->start,
897 (uintmax_t)l_curr->size,
898 (uintmax_t)l_new->start,
899 (uintmax_t)l_new->size ));
902 * Add into the dlink list after the l_curr point - NOT at lhead.
904 DLIST_ADD_AFTER(lhead, l_new, l_curr);
906 /* And move after the link we added. */
907 l_curr = l_new->next;
909 } else {
912 * This logic case should never happen. Ensure this is the
913 * case by forcing an abort.... Remove in production.
915 char *msg = NULL;
917 if (asprintf(&msg, "logic flaw in cases: "
918 "l_curr: start = %ju, "
919 "size = %ju : lock: "
920 "start = %ju, size = %ju",
921 (uintmax_t)l_curr->start,
922 (uintmax_t)l_curr->size,
923 (uintmax_t)lock->start,
924 (uintmax_t)lock->size ) != -1) {
925 smb_panic(msg);
926 } else {
927 smb_panic("posix_lock_list");
930 } /* end for ( l_curr = lhead; l_curr;) */
931 } /* end for (i=0; i<num_locks && ul_head; i++) */
933 return lhead;
936 /****************************************************************************
937 POSIX function to acquire a lock. Returns True if the
938 lock could be granted, False if not.
939 ****************************************************************************/
941 bool set_posix_lock_windows_flavour(files_struct *fsp,
942 uint64_t u_offset,
943 uint64_t u_count,
944 enum brl_type lock_type,
945 const struct lock_context *lock_ctx,
946 const struct lock_struct *plocks,
947 int num_locks,
948 int *errno_ret)
950 off_t offset;
951 off_t count;
952 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
953 bool ret = True;
954 size_t lock_count;
955 TALLOC_CTX *l_ctx = NULL;
956 struct lock_list *llist = NULL;
957 struct lock_list *ll = NULL;
959 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
960 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
961 (uintmax_t)u_offset, (uintmax_t)u_count,
962 posix_lock_type_name(lock_type)));
965 * If the requested lock won't fit in the POSIX range, we will
966 * pretend it was successful.
969 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
970 increment_lock_ref_count(fsp);
971 return True;
975 * Windows is very strange. It allows read locks to be overlayed
976 * (even over a write lock), but leaves the write lock in force until the first
977 * unlock. It also reference counts the locks. This means the following sequence :
979 * process1 process2
980 * ------------------------------------------------------------------------
981 * WRITE LOCK : start = 2, len = 10
982 * READ LOCK: start =0, len = 10 - FAIL
983 * READ LOCK : start = 0, len = 14
984 * READ LOCK: start =0, len = 10 - FAIL
985 * UNLOCK : start = 2, len = 10
986 * READ LOCK: start =0, len = 10 - OK
988 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
989 * would leave a single read lock over the 0-14 region.
992 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
993 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
994 return False;
997 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
998 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
999 talloc_destroy(l_ctx);
1000 return False;
1004 * Create the initial list entry containing the
1005 * lock we want to add.
1008 ZERO_STRUCTP(ll);
1009 ll->start = offset;
1010 ll->size = count;
1012 DLIST_ADD(llist, ll);
1015 * The following call calculates if there are any
1016 * overlapping locks held by this process on
1017 * fd's open on the same file and splits this list
1018 * into a list of lock ranges that do not overlap with existing
1019 * POSIX locks.
1022 llist = posix_lock_list(l_ctx,
1023 llist,
1024 lock_ctx, /* Lock context llist belongs to. */
1025 plocks,
1026 num_locks);
1029 * Add the POSIX locks on the list of ranges returned.
1030 * As the lock is supposed to be added atomically, we need to
1031 * back out all the locks if any one of these calls fail.
1034 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1035 offset = ll->start;
1036 count = ll->size;
1038 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
1039 "Type = %s: offset = %ju, count = %ju\n",
1040 posix_lock_type_name(posix_lock_type),
1041 (uintmax_t)offset, (uintmax_t)count ));
1043 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1044 *errno_ret = errno;
1045 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
1046 "fail !: Type = %s: offset = %ju, "
1047 "count = %ju. Errno = %s\n",
1048 posix_lock_type_name(posix_lock_type),
1049 (uintmax_t)offset, (uintmax_t)count,
1050 strerror(errno) ));
1051 ret = False;
1052 break;
1056 if (!ret) {
1059 * Back out all the POSIX locks we have on fail.
1062 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1063 offset = ll->start;
1064 count = ll->size;
1066 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1067 "out locks: Type = %s: offset = %ju, "
1068 "count = %ju\n",
1069 posix_lock_type_name(posix_lock_type),
1070 (uintmax_t)offset, (uintmax_t)count ));
1072 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1074 } else {
1075 /* Remember the number of locks we have on this dev/ino pair. */
1076 increment_lock_ref_count(fsp);
1079 talloc_destroy(l_ctx);
1080 return ret;
1083 /****************************************************************************
1084 POSIX function to release a lock. Returns True if the
1085 lock could be released, False if not.
1086 ****************************************************************************/
1088 bool release_posix_lock_windows_flavour(files_struct *fsp,
1089 uint64_t u_offset,
1090 uint64_t u_count,
1091 enum brl_type deleted_lock_type,
1092 const struct lock_context *lock_ctx,
1093 const struct lock_struct *plocks,
1094 int num_locks)
1096 off_t offset;
1097 off_t count;
1098 bool ret = True;
1099 TALLOC_CTX *ul_ctx = NULL;
1100 struct lock_list *ulist = NULL;
1101 struct lock_list *ul = NULL;
1103 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1104 "count = %ju\n", fsp_str_dbg(fsp),
1105 (uintmax_t)u_offset, (uintmax_t)u_count));
1107 /* Remember the number of locks we have on this dev/ino pair. */
1108 decrement_lock_ref_count(fsp);
1111 * If the requested lock won't fit in the POSIX range, we will
1112 * pretend it was successful.
1115 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1116 return True;
1119 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1120 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1121 return False;
1124 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1125 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1126 talloc_destroy(ul_ctx);
1127 return False;
1131 * Create the initial list entry containing the
1132 * lock we want to remove.
1135 ZERO_STRUCTP(ul);
1136 ul->start = offset;
1137 ul->size = count;
1139 DLIST_ADD(ulist, ul);
1142 * The following call calculates if there are any
1143 * overlapping locks held by this process on
1144 * fd's open on the same file and creates a
1145 * list of unlock ranges that will allow
1146 * POSIX lock ranges to remain on the file whilst the
1147 * unlocks are performed.
1150 ulist = posix_lock_list(ul_ctx,
1151 ulist,
1152 lock_ctx, /* Lock context ulist belongs to. */
1153 plocks,
1154 num_locks);
1157 * If there were any overlapped entries (list is > 1 or size or start have changed),
1158 * and the lock_type we just deleted from
1159 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1160 * the POSIX lock to a read lock. This allows any overlapping read locks
1161 * to be atomically maintained.
1164 if (deleted_lock_type == WRITE_LOCK &&
1165 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1167 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1168 "lock to READ: offset = %ju, count = %ju\n",
1169 (uintmax_t)offset, (uintmax_t)count ));
1171 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1172 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1173 talloc_destroy(ul_ctx);
1174 return False;
1179 * Release the POSIX locks on the list of ranges returned.
1182 for(; ulist; ulist = ulist->next) {
1183 offset = ulist->start;
1184 count = ulist->size;
1186 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1187 "offset = %ju, count = %ju\n",
1188 (uintmax_t)offset, (uintmax_t)count ));
1190 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1191 ret = False;
1195 talloc_destroy(ul_ctx);
1196 return ret;
1199 /****************************************************************************
1200 Next - the functions that deal with mapping CIFS POSIX locks onto
1201 the underlying system POSIX locks.
1202 ****************************************************************************/
1204 /****************************************************************************
1205 We only increment the lock ref count when we see a POSIX lock on a context
1206 that doesn't already have them.
1207 ****************************************************************************/
1209 static void increment_posix_lock_count(const files_struct *fsp,
1210 uint64_t smblctx)
1212 NTSTATUS status;
1213 TDB_DATA ctx_key;
1214 TDB_DATA val = { 0 };
1216 ctx_key.dptr = (uint8_t *)&smblctx;
1217 ctx_key.dsize = sizeof(smblctx);
1220 * Don't increment if we already have any POSIX flavor
1221 * locks on this context.
1223 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1224 return;
1227 /* Remember that we have POSIX flavor locks on this context. */
1228 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1229 SMB_ASSERT(NT_STATUS_IS_OK(status));
1231 increment_lock_ref_count(fsp);
1233 DEBUG(10,("posix_locks set for file %s\n",
1234 fsp_str_dbg(fsp)));
1237 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1239 NTSTATUS status;
1240 TDB_DATA ctx_key;
1242 ctx_key.dptr = (uint8_t *)&smblctx;
1243 ctx_key.dsize = sizeof(smblctx);
1245 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1246 SMB_ASSERT(NT_STATUS_IS_OK(status));
1248 decrement_lock_ref_count(fsp);
1250 DEBUG(10,("posix_locks deleted for file %s\n",
1251 fsp_str_dbg(fsp)));
1254 /****************************************************************************
1255 Return true if any locks exist on the given lock context.
1256 ****************************************************************************/
1258 static bool locks_exist_on_context(const struct lock_struct *plocks,
1259 int num_locks,
1260 const struct lock_context *lock_ctx)
1262 int i;
1264 for (i=0; i < num_locks; i++) {
1265 const struct lock_struct *lock = &plocks[i];
1267 /* Ignore all but read/write locks. */
1268 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1269 continue;
1272 /* Ignore locks not owned by this process. */
1273 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
1274 continue;
1277 if (lock_ctx->smblctx == lock->context.smblctx) {
1278 return true;
1281 return false;
1284 /****************************************************************************
1285 POSIX function to acquire a lock. Returns True if the
1286 lock could be granted, False if not.
1287 As POSIX locks don't stack or conflict (they just overwrite)
1288 we can map the requested lock directly onto a system one. We
1289 know it doesn't conflict with locks on other contexts as the
1290 upper layer would have refused it.
1291 ****************************************************************************/
1293 bool set_posix_lock_posix_flavour(files_struct *fsp,
1294 uint64_t u_offset,
1295 uint64_t u_count,
1296 enum brl_type lock_type,
1297 const struct lock_context *lock_ctx,
1298 int *errno_ret)
1300 off_t offset;
1301 off_t count;
1302 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1304 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1305 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1306 (uintmax_t)u_offset, (uintmax_t)u_count,
1307 posix_lock_type_name(lock_type)));
1310 * If the requested lock won't fit in the POSIX range, we will
1311 * pretend it was successful.
1314 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1315 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1316 return True;
1319 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1320 *errno_ret = errno;
1321 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1322 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1323 return False;
1325 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1326 return True;
1329 /****************************************************************************
1330 POSIX function to release a lock. Returns True if the
1331 lock could be released, False if not.
1332 We are given a complete lock state from the upper layer which is what the lock
1333 state should be after the unlock has already been done, so what
1334 we do is punch out holes in the unlock range where locks owned by this process
1335 have a different lock context.
1336 ****************************************************************************/
1338 bool release_posix_lock_posix_flavour(files_struct *fsp,
1339 uint64_t u_offset,
1340 uint64_t u_count,
1341 const struct lock_context *lock_ctx,
1342 const struct lock_struct *plocks,
1343 int num_locks)
1345 bool ret = True;
1346 off_t offset;
1347 off_t count;
1348 TALLOC_CTX *ul_ctx = NULL;
1349 struct lock_list *ulist = NULL;
1350 struct lock_list *ul = NULL;
1352 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1353 "count = %ju\n", fsp_str_dbg(fsp),
1354 (uintmax_t)u_offset, (uintmax_t)u_count));
1357 * If the requested lock won't fit in the POSIX range, we will
1358 * pretend it was successful.
1361 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1362 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1363 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1365 return True;
1368 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1369 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1370 return False;
1373 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1374 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1375 talloc_destroy(ul_ctx);
1376 return False;
1380 * Create the initial list entry containing the
1381 * lock we want to remove.
1384 ZERO_STRUCTP(ul);
1385 ul->start = offset;
1386 ul->size = count;
1388 DLIST_ADD(ulist, ul);
1391 * Walk the given array creating a linked list
1392 * of unlock requests.
1395 ulist = posix_lock_list(ul_ctx,
1396 ulist,
1397 lock_ctx, /* Lock context ulist belongs to. */
1398 plocks,
1399 num_locks);
1402 * Release the POSIX locks on the list of ranges returned.
1405 for(; ulist; ulist = ulist->next) {
1406 offset = ulist->start;
1407 count = ulist->size;
1409 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1410 "offset = %ju, count = %ju\n",
1411 (uintmax_t)offset, (uintmax_t)count ));
1413 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1414 ret = False;
1418 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1419 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1421 talloc_destroy(ul_ctx);
1422 return ret;