s3:locking: Fix integer overflow check in posix_lock_in_range()
[Samba.git] / source3 / locking / posix.c
blob0b627aaa3e58ed2190a143e3bd028653ad97115c
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
32 #undef DBGC_CLASS
33 #define DBGC_CLASS DBGC_LOCKING
36 * The pending close database handle.
39 static struct db_context *posix_pending_close_db;
41 /****************************************************************************
42 First - the functions that deal with the underlying system locks - these
43 functions are used no matter if we're mapping CIFS Windows locks or CIFS
44 POSIX locks onto POSIX.
45 ****************************************************************************/
47 /****************************************************************************
48 Utility function to map a lock type correctly depending on the open
49 mode of a file.
50 ****************************************************************************/
52 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
54 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
56 * Many UNIX's cannot get a write lock on a file opened read-only.
57 * Win32 locking semantics allow this.
58 * Do the best we can and attempt a read-only lock.
60 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
61 return F_RDLCK;
65 * This return should be the most normal, as we attempt
66 * to always open files read/write.
69 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
72 /****************************************************************************
73 Debugging aid :-).
74 ****************************************************************************/
76 static const char *posix_lock_type_name(int lock_type)
78 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
81 /****************************************************************************
82 Check to see if the given unsigned lock range is within the possible POSIX
83 range. Modifies the given args to be in range if possible, just returns
84 False if not.
85 ****************************************************************************/
87 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
88 uint64_t u_offset, uint64_t u_count)
90 off_t offset = (off_t)u_offset;
91 off_t count = (off_t)u_count;
94 * For the type of system we are, attempt to
95 * find the maximum positive lock offset as an off_t.
98 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
100 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
101 #else
103 * In this case off_t is 64 bits,
104 * and the underlying system can handle 64 bit signed locks.
107 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
108 off_t mask = (mask2<<1);
109 off_t max_positive_lock_offset = ~mask;
111 #endif
113 * POSIX locks of length zero mean lock to end-of-file.
114 * Win32 locks of length zero are point probes. Ignore
115 * any Win32 locks of length zero. JRA.
118 if (count == 0) {
119 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
120 return False;
124 * If the given offset was > max_positive_lock_offset then we cannot map this at all
125 * ignore this lock.
128 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
129 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
130 "and we cannot handle this. Ignoring lock.\n",
131 (uintmax_t)u_offset,
132 (uintmax_t)max_positive_lock_offset));
133 return False;
137 * We must truncate the count to less than max_positive_lock_offset.
140 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
141 count = max_positive_lock_offset;
145 * Truncate count to end at max lock offset.
148 if (offset > INT64_MAX - count ||
149 offset + count > max_positive_lock_offset) {
150 count = max_positive_lock_offset - offset;
154 * If we ate all the count, ignore this lock.
157 if (count == 0) {
158 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
159 "u_offset = %ju, u_count = %ju\n",
160 (uintmax_t)u_offset,
161 (uintmax_t)u_count));
162 return False;
166 * The mapping was successful.
169 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
170 "count_out = %ju\n",
171 (uintmax_t)offset, (uintmax_t)count));
173 *offset_out = offset;
174 *count_out = count;
176 return True;
179 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
180 struct files_struct *fsp, int op, off_t offset,
181 off_t count, int type)
183 VFS_FIND(lock);
184 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
187 /****************************************************************************
188 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
189 broken NFS implementations.
190 ****************************************************************************/
192 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
194 bool ret;
196 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
197 fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
199 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
201 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
203 if ((errno == EINVAL) &&
204 (op != F_GETLK &&
205 op != F_SETLK &&
206 op != F_SETLKW)) {
207 DEBUG(0,("WARNING: OFD locks in use and no kernel "
208 "support. Try setting "
209 "'smbd:force process locks = true' "
210 "in smb.conf\n"));
211 } else {
212 DEBUG(0, ("WARNING: lock request at offset "
213 "%ju, length %ju returned\n",
214 (uintmax_t)offset, (uintmax_t)count));
215 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
216 "lock offsets\n", strerror(errno)));
217 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
221 * If the offset is > 0x7FFFFFFF then this will cause problems on
222 * 32 bit NFS mounted filesystems. Just ignore it.
225 if (offset & ~((off_t)0x7fffffff)) {
226 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
227 return True;
230 if (count & ~((off_t)0x7fffffff)) {
231 /* 32 bit NFS file system, retry with smaller offset */
232 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
233 errno = 0;
234 count &= 0x7fffffff;
235 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
239 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
240 return ret;
243 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
244 struct files_struct *fsp, off_t *poffset,
245 off_t *pcount, int *ptype, pid_t *ppid)
247 VFS_FIND(getlock);
248 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
249 ppid);
252 /****************************************************************************
253 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
254 broken NFS implementations.
255 ****************************************************************************/
257 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
259 pid_t pid;
260 bool ret;
262 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
263 fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
264 *ptype));
266 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
268 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
270 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
271 "offset %ju, length %ju returned\n",
272 (uintmax_t)*poffset, (uintmax_t)*pcount));
273 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
274 "lock offsets\n", strerror(errno)));
275 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
278 * If the offset is > 0x7FFFFFFF then this will cause problems on
279 * 32 bit NFS mounted filesystems. Just ignore it.
282 if (*poffset & ~((off_t)0x7fffffff)) {
283 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
284 return True;
287 if (*pcount & ~((off_t)0x7fffffff)) {
288 /* 32 bit NFS file system, retry with smaller offset */
289 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
290 errno = 0;
291 *pcount &= 0x7fffffff;
292 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
296 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
297 return ret;
300 /****************************************************************************
301 POSIX function to see if a file region is locked. Returns True if the
302 region is locked, False otherwise.
303 ****************************************************************************/
305 bool is_posix_locked(files_struct *fsp,
306 uint64_t *pu_offset,
307 uint64_t *pu_count,
308 enum brl_type *plock_type,
309 enum brl_flavour lock_flav)
311 off_t offset;
312 off_t count;
313 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
315 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
316 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
317 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
320 * If the requested lock won't fit in the POSIX range, we will
321 * never set it, so presume it is not locked.
324 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
325 return False;
328 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
329 return False;
332 if (posix_lock_type == F_UNLCK) {
333 return False;
336 if (lock_flav == POSIX_LOCK) {
337 /* Only POSIX lock queries need to know the details. */
338 *pu_offset = (uint64_t)offset;
339 *pu_count = (uint64_t)count;
340 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
342 return True;
345 /****************************************************************************
346 Next - the functions that deal with in memory database storing representations
347 of either Windows CIFS locks or POSIX CIFS locks.
348 ****************************************************************************/
350 /* The key used in the in-memory POSIX databases. */
352 struct lock_ref_count_key {
353 struct file_id id;
354 char r;
357 /*******************************************************************
358 Form a static locking key for a dev/inode pair for the lock ref count
359 ******************************************************************/
361 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
362 struct lock_ref_count_key *tmp)
364 ZERO_STRUCTP(tmp);
365 tmp->id = fsp->file_id;
366 tmp->r = 'r';
367 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
370 /*******************************************************************
371 Convenience function to get an fd_array key from an fsp.
372 ******************************************************************/
374 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
376 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
379 /*******************************************************************
380 Create the in-memory POSIX lock databases.
381 ********************************************************************/
383 bool posix_locking_init(bool read_only)
385 if (posix_pending_close_db != NULL) {
386 return true;
389 posix_pending_close_db = db_open_rbt(NULL);
391 if (posix_pending_close_db == NULL) {
392 DEBUG(0,("Failed to open POSIX pending close database.\n"));
393 return false;
396 return true;
399 /*******************************************************************
400 Delete the in-memory POSIX lock databases.
401 ********************************************************************/
403 bool posix_locking_end(void)
406 * Shouldn't we close all fd's here?
408 TALLOC_FREE(posix_pending_close_db);
409 return true;
412 /****************************************************************************
413 Next - the functions that deal with storing fd's that have outstanding
414 POSIX locks when closed.
415 ****************************************************************************/
417 /****************************************************************************
418 The records in posix_pending_close_db are composed of an array of
419 ints keyed by dev/ino pair. Those ints are the fd's that were open on
420 this dev/ino pair that should have been closed, but can't as the lock
421 ref count is non zero.
422 ****************************************************************************/
424 /****************************************************************************
425 Keep a reference count of the number of locks open on this dev/ino
426 pair. Creates entry if it doesn't exist.
427 ****************************************************************************/
429 static void increment_lock_ref_count(const files_struct *fsp)
431 struct lock_ref_count_key tmp;
432 int32_t lock_ref_count = 0;
433 NTSTATUS status;
435 status = dbwrap_change_int32_atomic(
436 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
437 &lock_ref_count, 1);
439 SMB_ASSERT(NT_STATUS_IS_OK(status));
440 SMB_ASSERT(lock_ref_count < INT32_MAX);
442 DEBUG(10,("lock_ref_count for file %s = %d\n",
443 fsp_str_dbg(fsp), (int)lock_ref_count));
446 static void decrement_lock_ref_count(const files_struct *fsp)
448 struct lock_ref_count_key tmp;
449 int32_t lock_ref_count = 0;
450 NTSTATUS status;
452 status = dbwrap_change_int32_atomic(
453 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
454 &lock_ref_count, -1);
456 SMB_ASSERT(NT_STATUS_IS_OK(status));
457 SMB_ASSERT(lock_ref_count >= 0);
459 DEBUG(10,("lock_ref_count for file %s = %d\n",
460 fsp_str_dbg(fsp), (int)lock_ref_count));
463 /****************************************************************************
464 Fetch the lock ref count.
465 ****************************************************************************/
467 static int32_t get_lock_ref_count(const files_struct *fsp)
469 struct lock_ref_count_key tmp;
470 NTSTATUS status;
471 int32_t lock_ref_count = 0;
473 status = dbwrap_fetch_int32(
474 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
475 &lock_ref_count);
477 if (!NT_STATUS_IS_OK(status) &&
478 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
479 DEBUG(0, ("Error fetching "
480 "lock ref count for file %s: %s\n",
481 fsp_str_dbg(fsp), nt_errstr(status)));
483 return lock_ref_count;
486 /****************************************************************************
487 Delete a lock_ref_count entry.
488 ****************************************************************************/
490 static void delete_lock_ref_count(const files_struct *fsp)
492 struct lock_ref_count_key tmp;
494 /* Not a bug if it doesn't exist - no locks were ever granted. */
496 dbwrap_delete(posix_pending_close_db,
497 locking_ref_count_key_fsp(fsp, &tmp));
499 DEBUG(10,("delete_lock_ref_count for file %s\n",
500 fsp_str_dbg(fsp)));
503 /****************************************************************************
504 Add an fd to the pending close tdb.
505 ****************************************************************************/
507 static void add_fd_to_close_entry(const files_struct *fsp)
509 struct db_record *rec;
510 int *fds;
511 size_t num_fds;
512 NTSTATUS status;
513 TDB_DATA value;
515 rec = dbwrap_fetch_locked(
516 posix_pending_close_db, talloc_tos(),
517 fd_array_key_fsp(fsp));
519 SMB_ASSERT(rec != NULL);
521 value = dbwrap_record_get_value(rec);
522 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
524 num_fds = value.dsize / sizeof(int);
525 fds = talloc_array(rec, int, num_fds+1);
527 SMB_ASSERT(fds != NULL);
529 memcpy(fds, value.dptr, value.dsize);
530 fds[num_fds] = fsp->fh->fd;
532 status = dbwrap_record_store(
533 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
535 SMB_ASSERT(NT_STATUS_IS_OK(status));
537 TALLOC_FREE(rec);
539 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
540 fsp->fh->fd, fsp_str_dbg(fsp)));
543 /****************************************************************************
544 Remove all fd entries for a specific dev/inode pair from the tdb.
545 ****************************************************************************/
547 static void delete_close_entries(const files_struct *fsp)
549 struct db_record *rec;
551 rec = dbwrap_fetch_locked(
552 posix_pending_close_db, talloc_tos(),
553 fd_array_key_fsp(fsp));
555 SMB_ASSERT(rec != NULL);
556 dbwrap_record_delete(rec);
557 TALLOC_FREE(rec);
560 /****************************************************************************
561 Get the array of POSIX pending close records for an open fsp. Returns number
562 of entries.
563 ****************************************************************************/
565 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
566 const files_struct *fsp,
567 int **entries)
569 TDB_DATA dbuf;
570 NTSTATUS status;
572 status = dbwrap_fetch(
573 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
574 &dbuf);
576 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
577 *entries = NULL;
578 return 0;
581 SMB_ASSERT(NT_STATUS_IS_OK(status));
583 if (dbuf.dsize == 0) {
584 *entries = NULL;
585 return 0;
588 *entries = (int *)dbuf.dptr;
589 return (size_t)(dbuf.dsize / sizeof(int));
592 /****************************************************************************
593 Deal with pending closes needed by POSIX locking support.
594 Note that posix_locking_close_file() is expected to have been called
595 to delete all locks on this fsp before this function is called.
596 ****************************************************************************/
598 int fd_close_posix(const struct files_struct *fsp)
600 int saved_errno = 0;
601 int ret;
602 int *fd_array = NULL;
603 size_t count, i;
605 if (!lp_locking(fsp->conn->params) ||
606 !lp_posix_locking(fsp->conn->params) ||
607 fsp->use_ofd_locks)
610 * No locking or POSIX to worry about or we are using POSIX
611 * open file description lock semantics which only removes
612 * locks on the file descriptor we're closing. Just close.
614 return close(fsp->fh->fd);
617 if (get_lock_ref_count(fsp)) {
620 * There are outstanding locks on this dev/inode pair on
621 * other fds. Add our fd to the pending close tdb and set
622 * fsp->fh->fd to -1.
625 add_fd_to_close_entry(fsp);
626 return 0;
630 * No outstanding locks. Get the pending close fd's
631 * from the tdb and close them all.
634 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
636 if (count) {
637 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
638 (unsigned int)count));
640 for(i = 0; i < count; i++) {
641 if (close(fd_array[i]) == -1) {
642 saved_errno = errno;
647 * Delete all fd's stored in the tdb
648 * for this dev/inode pair.
651 delete_close_entries(fsp);
654 TALLOC_FREE(fd_array);
656 /* Don't need a lock ref count on this dev/ino anymore. */
657 delete_lock_ref_count(fsp);
660 * Finally close the fd associated with this fsp.
663 ret = close(fsp->fh->fd);
665 if (ret == 0 && saved_errno != 0) {
666 errno = saved_errno;
667 ret = -1;
670 return ret;
673 /****************************************************************************
674 Next - the functions that deal with the mapping CIFS Windows locks onto
675 the underlying system POSIX locks.
676 ****************************************************************************/
679 * Structure used when splitting a lock range
680 * into a POSIX lock range. Doubly linked list.
683 struct lock_list {
684 struct lock_list *next;
685 struct lock_list *prev;
686 off_t start;
687 off_t size;
690 /****************************************************************************
691 Create a list of lock ranges that don't overlap a given range. Used in calculating
692 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
693 understand it :-).
694 ****************************************************************************/
696 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
697 struct lock_list *lhead,
698 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
699 const struct lock_struct *plocks,
700 int num_locks)
702 int i;
705 * Check the current lock list on this dev/inode pair.
706 * Quit if the list is deleted.
709 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
710 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
712 for (i=0; i<num_locks && lhead; i++) {
713 const struct lock_struct *lock = &plocks[i];
714 struct lock_list *l_curr;
716 /* Ignore all but read/write locks. */
717 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
718 continue;
721 /* Ignore locks not owned by this process. */
722 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
723 continue;
727 * Walk the lock list, checking for overlaps. Note that
728 * the lock list can expand within this loop if the current
729 * range being examined needs to be split.
732 for (l_curr = lhead; l_curr;) {
734 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
735 "start=%ju,size=%ju:type=%s",
736 (uintmax_t)lock->fnum,
737 (uintmax_t)lock->start,
738 (uintmax_t)lock->size,
739 posix_lock_type_name(lock->lock_type) ));
741 if ( (l_curr->start >= (lock->start + lock->size)) ||
742 (lock->start >= (l_curr->start + l_curr->size))) {
744 /* No overlap with existing lock - leave this range alone. */
745 /*********************************************
746 +---------+
747 | l_curr |
748 +---------+
749 +-------+
750 | lock |
751 +-------+
752 OR....
753 +---------+
754 | l_curr |
755 +---------+
756 **********************************************/
758 DEBUG(10,(" no overlap case.\n" ));
760 l_curr = l_curr->next;
762 } else if ( (l_curr->start >= lock->start) &&
763 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
766 * This range is completely overlapped by this existing lock range
767 * and thus should have no effect. Delete it from the list.
769 /*********************************************
770 +---------+
771 | l_curr |
772 +---------+
773 +---------------------------+
774 | lock |
775 +---------------------------+
776 **********************************************/
777 /* Save the next pointer */
778 struct lock_list *ul_next = l_curr->next;
780 DEBUG(10,(" delete case.\n" ));
782 DLIST_REMOVE(lhead, l_curr);
783 if(lhead == NULL) {
784 break; /* No more list... */
787 l_curr = ul_next;
789 } else if ( (l_curr->start >= lock->start) &&
790 (l_curr->start < lock->start + lock->size) &&
791 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
794 * This range overlaps the existing lock range at the high end.
795 * Truncate by moving start to existing range end and reducing size.
797 /*********************************************
798 +---------------+
799 | l_curr |
800 +---------------+
801 +---------------+
802 | lock |
803 +---------------+
804 BECOMES....
805 +-------+
806 | l_curr|
807 +-------+
808 **********************************************/
810 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
811 l_curr->start = lock->start + lock->size;
813 DEBUG(10, (" truncate high case: start=%ju,"
814 "size=%ju\n",
815 (uintmax_t)l_curr->start,
816 (uintmax_t)l_curr->size ));
818 l_curr = l_curr->next;
820 } else if ( (l_curr->start < lock->start) &&
821 (l_curr->start + l_curr->size > lock->start) &&
822 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
825 * This range overlaps the existing lock range at the low end.
826 * Truncate by reducing size.
828 /*********************************************
829 +---------------+
830 | l_curr |
831 +---------------+
832 +---------------+
833 | lock |
834 +---------------+
835 BECOMES....
836 +-------+
837 | l_curr|
838 +-------+
839 **********************************************/
841 l_curr->size = lock->start - l_curr->start;
843 DEBUG(10, (" truncate low case: start=%ju,"
844 "size=%ju\n",
845 (uintmax_t)l_curr->start,
846 (uintmax_t)l_curr->size ));
848 l_curr = l_curr->next;
850 } else if ( (l_curr->start < lock->start) &&
851 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
853 * Worst case scenario. Range completely overlaps an existing
854 * lock range. Split the request into two, push the new (upper) request
855 * into the dlink list, and continue with the entry after l_new (as we
856 * know that l_new will not overlap with this lock).
858 /*********************************************
859 +---------------------------+
860 | l_curr |
861 +---------------------------+
862 +---------+
863 | lock |
864 +---------+
865 BECOMES.....
866 +-------+ +---------+
867 | l_curr| | l_new |
868 +-------+ +---------+
869 **********************************************/
870 struct lock_list *l_new = talloc(ctx, struct lock_list);
872 if(l_new == NULL) {
873 DEBUG(0,("posix_lock_list: talloc fail.\n"));
874 return NULL; /* The talloc_destroy takes care of cleanup. */
877 ZERO_STRUCTP(l_new);
878 l_new->start = lock->start + lock->size;
879 l_new->size = l_curr->start + l_curr->size - l_new->start;
881 /* Truncate the l_curr. */
882 l_curr->size = lock->start - l_curr->start;
884 DEBUG(10, (" split case: curr: start=%ju,"
885 "size=%ju new: start=%ju,"
886 "size=%ju\n",
887 (uintmax_t)l_curr->start,
888 (uintmax_t)l_curr->size,
889 (uintmax_t)l_new->start,
890 (uintmax_t)l_new->size ));
893 * Add into the dlink list after the l_curr point - NOT at lhead.
895 DLIST_ADD_AFTER(lhead, l_new, l_curr);
897 /* And move after the link we added. */
898 l_curr = l_new->next;
900 } else {
903 * This logic case should never happen. Ensure this is the
904 * case by forcing an abort.... Remove in production.
906 char *msg = NULL;
908 if (asprintf(&msg, "logic flaw in cases: "
909 "l_curr: start = %ju, "
910 "size = %ju : lock: "
911 "start = %ju, size = %ju",
912 (uintmax_t)l_curr->start,
913 (uintmax_t)l_curr->size,
914 (uintmax_t)lock->start,
915 (uintmax_t)lock->size ) != -1) {
916 smb_panic(msg);
917 } else {
918 smb_panic("posix_lock_list");
921 } /* end for ( l_curr = lhead; l_curr;) */
922 } /* end for (i=0; i<num_locks && ul_head; i++) */
924 return lhead;
927 /****************************************************************************
928 POSIX function to acquire a lock. Returns True if the
929 lock could be granted, False if not.
930 ****************************************************************************/
932 bool set_posix_lock_windows_flavour(files_struct *fsp,
933 uint64_t u_offset,
934 uint64_t u_count,
935 enum brl_type lock_type,
936 const struct lock_context *lock_ctx,
937 const struct lock_struct *plocks,
938 int num_locks,
939 int *errno_ret)
941 off_t offset;
942 off_t count;
943 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
944 bool ret = True;
945 size_t lock_count;
946 TALLOC_CTX *l_ctx = NULL;
947 struct lock_list *llist = NULL;
948 struct lock_list *ll = NULL;
950 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
951 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
952 (uintmax_t)u_offset, (uintmax_t)u_count,
953 posix_lock_type_name(lock_type)));
956 * If the requested lock won't fit in the POSIX range, we will
957 * pretend it was successful.
960 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
961 increment_lock_ref_count(fsp);
962 return True;
966 * Windows is very strange. It allows read locks to be overlayed
967 * (even over a write lock), but leaves the write lock in force until the first
968 * unlock. It also reference counts the locks. This means the following sequence :
970 * process1 process2
971 * ------------------------------------------------------------------------
972 * WRITE LOCK : start = 2, len = 10
973 * READ LOCK: start =0, len = 10 - FAIL
974 * READ LOCK : start = 0, len = 14
975 * READ LOCK: start =0, len = 10 - FAIL
976 * UNLOCK : start = 2, len = 10
977 * READ LOCK: start =0, len = 10 - OK
979 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
980 * would leave a single read lock over the 0-14 region.
983 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
984 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
985 return False;
988 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
989 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
990 talloc_destroy(l_ctx);
991 return False;
995 * Create the initial list entry containing the
996 * lock we want to add.
999 ZERO_STRUCTP(ll);
1000 ll->start = offset;
1001 ll->size = count;
1003 DLIST_ADD(llist, ll);
1006 * The following call calculates if there are any
1007 * overlapping locks held by this process on
1008 * fd's open on the same file and splits this list
1009 * into a list of lock ranges that do not overlap with existing
1010 * POSIX locks.
1013 llist = posix_lock_list(l_ctx,
1014 llist,
1015 lock_ctx, /* Lock context llist belongs to. */
1016 plocks,
1017 num_locks);
1020 * Add the POSIX locks on the list of ranges returned.
1021 * As the lock is supposed to be added atomically, we need to
1022 * back out all the locks if any one of these calls fail.
1025 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1026 offset = ll->start;
1027 count = ll->size;
1029 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
1030 "Type = %s: offset = %ju, count = %ju\n",
1031 posix_lock_type_name(posix_lock_type),
1032 (uintmax_t)offset, (uintmax_t)count ));
1034 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1035 *errno_ret = errno;
1036 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
1037 "fail !: Type = %s: offset = %ju, "
1038 "count = %ju. Errno = %s\n",
1039 posix_lock_type_name(posix_lock_type),
1040 (uintmax_t)offset, (uintmax_t)count,
1041 strerror(errno) ));
1042 ret = False;
1043 break;
1047 if (!ret) {
1050 * Back out all the POSIX locks we have on fail.
1053 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1054 offset = ll->start;
1055 count = ll->size;
1057 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1058 "out locks: Type = %s: offset = %ju, "
1059 "count = %ju\n",
1060 posix_lock_type_name(posix_lock_type),
1061 (uintmax_t)offset, (uintmax_t)count ));
1063 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1065 } else {
1066 /* Remember the number of locks we have on this dev/ino pair. */
1067 increment_lock_ref_count(fsp);
1070 talloc_destroy(l_ctx);
1071 return ret;
1074 /****************************************************************************
1075 POSIX function to release a lock. Returns True if the
1076 lock could be released, False if not.
1077 ****************************************************************************/
1079 bool release_posix_lock_windows_flavour(files_struct *fsp,
1080 uint64_t u_offset,
1081 uint64_t u_count,
1082 enum brl_type deleted_lock_type,
1083 const struct lock_context *lock_ctx,
1084 const struct lock_struct *plocks,
1085 int num_locks)
1087 off_t offset;
1088 off_t count;
1089 bool ret = True;
1090 TALLOC_CTX *ul_ctx = NULL;
1091 struct lock_list *ulist = NULL;
1092 struct lock_list *ul = NULL;
1094 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1095 "count = %ju\n", fsp_str_dbg(fsp),
1096 (uintmax_t)u_offset, (uintmax_t)u_count));
1098 /* Remember the number of locks we have on this dev/ino pair. */
1099 decrement_lock_ref_count(fsp);
1102 * If the requested lock won't fit in the POSIX range, we will
1103 * pretend it was successful.
1106 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1107 return True;
1110 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1111 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1112 return False;
1115 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1116 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1117 talloc_destroy(ul_ctx);
1118 return False;
1122 * Create the initial list entry containing the
1123 * lock we want to remove.
1126 ZERO_STRUCTP(ul);
1127 ul->start = offset;
1128 ul->size = count;
1130 DLIST_ADD(ulist, ul);
1133 * The following call calculates if there are any
1134 * overlapping locks held by this process on
1135 * fd's open on the same file and creates a
1136 * list of unlock ranges that will allow
1137 * POSIX lock ranges to remain on the file whilst the
1138 * unlocks are performed.
1141 ulist = posix_lock_list(ul_ctx,
1142 ulist,
1143 lock_ctx, /* Lock context ulist belongs to. */
1144 plocks,
1145 num_locks);
1148 * If there were any overlapped entries (list is > 1 or size or start have changed),
1149 * and the lock_type we just deleted from
1150 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1151 * the POSIX lock to a read lock. This allows any overlapping read locks
1152 * to be atomically maintained.
1155 if (deleted_lock_type == WRITE_LOCK &&
1156 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1158 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1159 "lock to READ: offset = %ju, count = %ju\n",
1160 (uintmax_t)offset, (uintmax_t)count ));
1162 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1163 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1164 talloc_destroy(ul_ctx);
1165 return False;
1170 * Release the POSIX locks on the list of ranges returned.
1173 for(; ulist; ulist = ulist->next) {
1174 offset = ulist->start;
1175 count = ulist->size;
1177 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1178 "offset = %ju, count = %ju\n",
1179 (uintmax_t)offset, (uintmax_t)count ));
1181 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1182 ret = False;
1186 talloc_destroy(ul_ctx);
1187 return ret;
1190 /****************************************************************************
1191 Next - the functions that deal with mapping CIFS POSIX locks onto
1192 the underlying system POSIX locks.
1193 ****************************************************************************/
1195 /****************************************************************************
1196 We only increment the lock ref count when we see a POSIX lock on a context
1197 that doesn't already have them.
1198 ****************************************************************************/
1200 static void increment_posix_lock_count(const files_struct *fsp,
1201 uint64_t smblctx)
1203 NTSTATUS status;
1204 TDB_DATA ctx_key;
1205 TDB_DATA val = { 0 };
1207 ctx_key.dptr = (uint8_t *)&smblctx;
1208 ctx_key.dsize = sizeof(smblctx);
1211 * Don't increment if we already have any POSIX flavor
1212 * locks on this context.
1214 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1215 return;
1218 /* Remember that we have POSIX flavor locks on this context. */
1219 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1220 SMB_ASSERT(NT_STATUS_IS_OK(status));
1222 increment_lock_ref_count(fsp);
1224 DEBUG(10,("posix_locks set for file %s\n",
1225 fsp_str_dbg(fsp)));
1228 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1230 NTSTATUS status;
1231 TDB_DATA ctx_key;
1233 ctx_key.dptr = (uint8_t *)&smblctx;
1234 ctx_key.dsize = sizeof(smblctx);
1236 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1237 SMB_ASSERT(NT_STATUS_IS_OK(status));
1239 decrement_lock_ref_count(fsp);
1241 DEBUG(10,("posix_locks deleted for file %s\n",
1242 fsp_str_dbg(fsp)));
1245 /****************************************************************************
1246 Return true if any locks exist on the given lock context.
1247 ****************************************************************************/
1249 static bool locks_exist_on_context(const struct lock_struct *plocks,
1250 int num_locks,
1251 const struct lock_context *lock_ctx)
1253 int i;
1255 for (i=0; i < num_locks; i++) {
1256 const struct lock_struct *lock = &plocks[i];
1258 /* Ignore all but read/write locks. */
1259 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1260 continue;
1263 /* Ignore locks not owned by this process. */
1264 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
1265 continue;
1268 if (lock_ctx->smblctx == lock->context.smblctx) {
1269 return true;
1272 return false;
1275 /****************************************************************************
1276 POSIX function to acquire a lock. Returns True if the
1277 lock could be granted, False if not.
1278 As POSIX locks don't stack or conflict (they just overwrite)
1279 we can map the requested lock directly onto a system one. We
1280 know it doesn't conflict with locks on other contexts as the
1281 upper layer would have refused it.
1282 ****************************************************************************/
1284 bool set_posix_lock_posix_flavour(files_struct *fsp,
1285 uint64_t u_offset,
1286 uint64_t u_count,
1287 enum brl_type lock_type,
1288 const struct lock_context *lock_ctx,
1289 int *errno_ret)
1291 off_t offset;
1292 off_t count;
1293 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1295 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1296 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1297 (uintmax_t)u_offset, (uintmax_t)u_count,
1298 posix_lock_type_name(lock_type)));
1301 * If the requested lock won't fit in the POSIX range, we will
1302 * pretend it was successful.
1305 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1306 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1307 return True;
1310 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1311 *errno_ret = errno;
1312 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1313 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1314 return False;
1316 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1317 return True;
1320 /****************************************************************************
1321 POSIX function to release a lock. Returns True if the
1322 lock could be released, False if not.
1323 We are given a complete lock state from the upper layer which is what the lock
1324 state should be after the unlock has already been done, so what
1325 we do is punch out holes in the unlock range where locks owned by this process
1326 have a different lock context.
1327 ****************************************************************************/
1329 bool release_posix_lock_posix_flavour(files_struct *fsp,
1330 uint64_t u_offset,
1331 uint64_t u_count,
1332 const struct lock_context *lock_ctx,
1333 const struct lock_struct *plocks,
1334 int num_locks)
1336 bool ret = True;
1337 off_t offset;
1338 off_t count;
1339 TALLOC_CTX *ul_ctx = NULL;
1340 struct lock_list *ulist = NULL;
1341 struct lock_list *ul = NULL;
1343 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1344 "count = %ju\n", fsp_str_dbg(fsp),
1345 (uintmax_t)u_offset, (uintmax_t)u_count));
1348 * If the requested lock won't fit in the POSIX range, we will
1349 * pretend it was successful.
1352 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1353 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1354 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1356 return True;
1359 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1360 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1361 return False;
1364 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1365 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1366 talloc_destroy(ul_ctx);
1367 return False;
1371 * Create the initial list entry containing the
1372 * lock we want to remove.
1375 ZERO_STRUCTP(ul);
1376 ul->start = offset;
1377 ul->size = count;
1379 DLIST_ADD(ulist, ul);
1382 * Walk the given array creating a linked list
1383 * of unlock requests.
1386 ulist = posix_lock_list(ul_ctx,
1387 ulist,
1388 lock_ctx, /* Lock context ulist belongs to. */
1389 plocks,
1390 num_locks);
1393 * Release the POSIX locks on the list of ranges returned.
1396 for(; ulist; ulist = ulist->next) {
1397 offset = ulist->start;
1398 count = ulist->size;
1400 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1401 "offset = %ju, count = %ju\n",
1402 (uintmax_t)offset, (uintmax_t)count ));
1404 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1405 ret = False;
1409 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1410 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1412 talloc_destroy(ul_ctx);
1413 return ret;