smbXsrv_session: Remove a "can't happen" NULL check
[Samba.git] / source3 / locking / posix.c
blobeedbbc1c58b3a630d47f707587a90f69112abd3f
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
31 #include "smbd/fd_handle.h"
33 #undef DBGC_CLASS
34 #define DBGC_CLASS DBGC_LOCKING
37 * The pending close database handle.
40 static struct db_context *posix_pending_close_db;
42 /****************************************************************************
43 First - the functions that deal with the underlying system locks - these
44 functions are used no matter if we're mapping CIFS Windows locks or CIFS
45 POSIX locks onto POSIX.
46 ****************************************************************************/
48 /****************************************************************************
49 Utility function to map a lock type correctly depending on the open
50 mode of a file.
51 ****************************************************************************/
53 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
55 if ((lock_type == WRITE_LOCK) && !fsp->fsp_flags.can_write) {
57 * Many UNIX's cannot get a write lock on a file opened read-only.
58 * Win32 locking semantics allow this.
59 * Do the best we can and attempt a read-only lock.
61 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
62 return F_RDLCK;
66 * This return should be the most normal, as we attempt
67 * to always open files read/write.
70 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
73 /****************************************************************************
74 Debugging aid :-).
75 ****************************************************************************/
77 static const char *posix_lock_type_name(int lock_type)
79 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
82 /****************************************************************************
83 Check to see if the given unsigned lock range is within the possible POSIX
84 range. Modifies the given args to be in range if possible, just returns
85 False if not.
86 ****************************************************************************/
88 #define SMB_OFF_T_BITS (sizeof(off_t)*8)
90 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
91 uint64_t u_offset, uint64_t u_count)
93 off_t offset = (off_t)u_offset;
94 off_t count = (off_t)u_count;
97 * For the type of system we are, attempt to
98 * find the maximum positive lock offset as an off_t.
101 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
103 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
104 #else
106 * In this case off_t is 64 bits,
107 * and the underlying system can handle 64 bit signed locks.
110 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
111 off_t mask = (mask2<<1);
112 off_t max_positive_lock_offset = ~mask;
114 #endif
116 * POSIX locks of length zero mean lock to end-of-file.
117 * Win32 locks of length zero are point probes. Ignore
118 * any Win32 locks of length zero. JRA.
121 if (count == 0) {
122 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
123 return False;
127 * If the given offset was > max_positive_lock_offset then we cannot map this at all
128 * ignore this lock.
131 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
132 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
133 "and we cannot handle this. Ignoring lock.\n",
134 (uintmax_t)u_offset,
135 (uintmax_t)max_positive_lock_offset));
136 return False;
140 * We must truncate the count to less than max_positive_lock_offset.
143 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
144 count = max_positive_lock_offset;
148 * Truncate count to end at max lock offset.
151 if (offset > INT64_MAX - count ||
152 offset + count > max_positive_lock_offset) {
153 count = max_positive_lock_offset - offset;
157 * If we ate all the count, ignore this lock.
160 if (count == 0) {
161 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
162 "u_offset = %ju, u_count = %ju\n",
163 (uintmax_t)u_offset,
164 (uintmax_t)u_count));
165 return False;
169 * The mapping was successful.
172 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
173 "count_out = %ju\n",
174 (uintmax_t)offset, (uintmax_t)count));
176 *offset_out = offset;
177 *count_out = count;
179 return True;
182 /****************************************************************************
183 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
184 broken NFS implementations.
185 ****************************************************************************/
187 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
189 bool ret;
191 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
192 fsp_get_io_fd(fsp),op,(intmax_t)offset,(intmax_t)count,type));
194 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
196 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
198 if ((errno == EINVAL) &&
199 (op != F_GETLK &&
200 op != F_SETLK &&
201 op != F_SETLKW)) {
202 DEBUG(0,("WARNING: OFD locks in use and no kernel "
203 "support. Try setting "
204 "'smbd:force process locks = true' "
205 "in smb.conf\n"));
206 } else {
207 DEBUG(0, ("WARNING: lock request at offset "
208 "%ju, length %ju returned\n",
209 (uintmax_t)offset, (uintmax_t)count));
210 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
211 "lock offsets\n", strerror(errno)));
212 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
216 * If the offset is > 0x7FFFFFFF then this will cause problems on
217 * 32 bit NFS mounted filesystems. Just ignore it.
220 if (offset & ~((off_t)0x7fffffff)) {
221 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
222 return True;
225 if (count & ~((off_t)0x7fffffff)) {
226 /* 32 bit NFS file system, retry with smaller offset */
227 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
228 errno = 0;
229 count &= 0x7fffffff;
230 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
234 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
235 return ret;
238 /****************************************************************************
239 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
240 broken NFS implementations.
241 ****************************************************************************/
243 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
245 pid_t pid;
246 bool ret;
248 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
249 fsp_get_io_fd(fsp), (uintmax_t)*poffset, (uintmax_t)*pcount,
250 *ptype));
252 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
254 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
256 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
257 "offset %ju, length %ju returned\n",
258 (uintmax_t)*poffset, (uintmax_t)*pcount));
259 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
260 "lock offsets\n", strerror(errno)));
261 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
264 * If the offset is > 0x7FFFFFFF then this will cause problems on
265 * 32 bit NFS mounted filesystems. Just ignore it.
268 if (*poffset & ~((off_t)0x7fffffff)) {
269 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
270 return True;
273 if (*pcount & ~((off_t)0x7fffffff)) {
274 /* 32 bit NFS file system, retry with smaller offset */
275 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
276 errno = 0;
277 *pcount &= 0x7fffffff;
278 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
282 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
283 return ret;
286 /****************************************************************************
287 POSIX function to see if a file region is locked. Returns True if the
288 region is locked, False otherwise.
289 ****************************************************************************/
291 bool is_posix_locked(files_struct *fsp,
292 uint64_t *pu_offset,
293 uint64_t *pu_count,
294 enum brl_type *plock_type,
295 enum brl_flavour lock_flav)
297 off_t offset;
298 off_t count;
299 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
301 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
302 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
303 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
306 * If the requested lock won't fit in the POSIX range, we will
307 * never set it, so presume it is not locked.
310 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
311 return False;
314 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
315 return False;
318 if (posix_lock_type == F_UNLCK) {
319 return False;
322 if (lock_flav == POSIX_LOCK) {
323 /* Only POSIX lock queries need to know the details. */
324 *pu_offset = (uint64_t)offset;
325 *pu_count = (uint64_t)count;
326 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
328 return True;
331 /****************************************************************************
332 Next - the functions that deal with in memory database storing representations
333 of either Windows CIFS locks or POSIX CIFS locks.
334 ****************************************************************************/
336 /* The key used in the in-memory POSIX databases. */
338 struct lock_ref_count_key {
339 struct file_id id;
340 char r;
343 /*******************************************************************
344 Form a static locking key for a dev/inode pair for the lock ref count
345 ******************************************************************/
347 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
348 struct lock_ref_count_key *tmp)
350 ZERO_STRUCTP(tmp);
351 tmp->id = fsp->file_id;
352 tmp->r = 'r';
353 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
356 /*******************************************************************
357 Convenience function to get an fd_array key from an fsp.
358 ******************************************************************/
360 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
362 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
365 /*******************************************************************
366 Create the in-memory POSIX lock databases.
367 ********************************************************************/
369 bool posix_locking_init(bool read_only)
371 if (posix_pending_close_db != NULL) {
372 return true;
375 posix_pending_close_db = db_open_rbt(NULL);
377 if (posix_pending_close_db == NULL) {
378 DEBUG(0,("Failed to open POSIX pending close database.\n"));
379 return false;
382 return true;
385 /*******************************************************************
386 Delete the in-memory POSIX lock databases.
387 ********************************************************************/
389 bool posix_locking_end(void)
392 * Shouldn't we close all fd's here?
394 TALLOC_FREE(posix_pending_close_db);
395 return true;
398 /****************************************************************************
399 Next - the functions that deal with reference count of number of locks open
400 on a dev/ino pair.
401 ****************************************************************************/
403 /****************************************************************************
404 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
405 ****************************************************************************/
407 static void increment_lock_ref_count(const files_struct *fsp)
409 struct lock_ref_count_key tmp;
410 int32_t lock_ref_count = 0;
411 NTSTATUS status;
413 status = dbwrap_change_int32_atomic(
414 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
415 &lock_ref_count, 1);
417 SMB_ASSERT(NT_STATUS_IS_OK(status));
418 SMB_ASSERT(lock_ref_count < INT32_MAX);
420 DEBUG(10,("lock_ref_count for file %s = %d\n",
421 fsp_str_dbg(fsp), (int)(lock_ref_count + 1)));
424 /****************************************************************************
425 Reduce the lock ref count.
426 ****************************************************************************/
428 static void decrement_lock_ref_count(const files_struct *fsp)
430 struct lock_ref_count_key tmp;
431 int32_t lock_ref_count = 0;
432 NTSTATUS status;
434 status = dbwrap_change_int32_atomic(
435 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
436 &lock_ref_count, -1);
438 SMB_ASSERT(NT_STATUS_IS_OK(status));
439 SMB_ASSERT(lock_ref_count > 0);
441 DEBUG(10,("lock_ref_count for file %s = %d\n",
442 fsp_str_dbg(fsp), (int)(lock_ref_count - 1)));
445 /****************************************************************************
446 Fetch the lock ref count.
447 ****************************************************************************/
449 static int32_t get_lock_ref_count(const files_struct *fsp)
451 struct lock_ref_count_key tmp;
452 NTSTATUS status;
453 int32_t lock_ref_count = 0;
455 status = dbwrap_fetch_int32(
456 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
457 &lock_ref_count);
459 if (!NT_STATUS_IS_OK(status) &&
460 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
461 DEBUG(0, ("Error fetching "
462 "lock ref count for file %s: %s\n",
463 fsp_str_dbg(fsp), nt_errstr(status)));
465 return lock_ref_count;
468 /****************************************************************************
469 Delete a lock_ref_count entry.
470 ****************************************************************************/
472 static void delete_lock_ref_count(const files_struct *fsp)
474 struct lock_ref_count_key tmp;
476 /* Not a bug if it doesn't exist - no locks were ever granted. */
478 dbwrap_delete(posix_pending_close_db,
479 locking_ref_count_key_fsp(fsp, &tmp));
481 DEBUG(10,("delete_lock_ref_count for file %s\n",
482 fsp_str_dbg(fsp)));
485 /****************************************************************************
486 Next - the functions that deal with storing fd's that have outstanding
487 POSIX locks when closed.
488 ****************************************************************************/
490 /****************************************************************************
491 The records in posix_pending_close_db are composed of an array of
492 ints keyed by dev/ino pair. Those ints are the fd's that were open on
493 this dev/ino pair that should have been closed, but can't as the lock
494 ref count is non zero.
495 ****************************************************************************/
497 struct add_fd_to_close_entry_state {
498 const struct files_struct *fsp;
501 static void add_fd_to_close_entry_fn(
502 struct db_record *rec,
503 TDB_DATA value,
504 void *private_data)
506 struct add_fd_to_close_entry_state *state = private_data;
507 int fd = fsp_get_pathref_fd(state->fsp);
508 TDB_DATA values[] = {
509 value,
510 { .dptr = (uint8_t *)&fd,
511 .dsize = sizeof(fd) },
513 NTSTATUS status;
515 SMB_ASSERT((values[0].dsize % sizeof(int)) == 0);
517 status = dbwrap_record_storev(rec, values, ARRAY_SIZE(values), 0);
518 SMB_ASSERT(NT_STATUS_IS_OK(status));
521 /****************************************************************************
522 Add an fd to the pending close db.
523 ****************************************************************************/
525 static void add_fd_to_close_entry(const files_struct *fsp)
527 struct add_fd_to_close_entry_state state = { .fsp = fsp };
528 NTSTATUS status;
530 status = dbwrap_do_locked(
531 posix_pending_close_db,
532 fd_array_key_fsp(fsp),
533 add_fd_to_close_entry_fn,
534 &state);
535 SMB_ASSERT(NT_STATUS_IS_OK(status));
537 DBG_DEBUG("added fd %d file %s\n",
538 fsp_get_pathref_fd(fsp),
539 fsp_str_dbg(fsp));
542 static void fd_close_posix_fn(
543 struct db_record *rec,
544 TDB_DATA data,
545 void *private_data)
547 int *saved_errno = (int *)private_data;
548 size_t num_fds, i;
550 SMB_ASSERT((data.dsize % sizeof(int)) == 0);
551 num_fds = data.dsize / sizeof(int);
553 for (i=0; i<num_fds; i++) {
554 int fd;
555 int ret;
556 memcpy(&fd, data.dptr, sizeof(int));
557 ret = close(fd);
558 if (ret == -1) {
559 *saved_errno = errno;
561 data.dptr += sizeof(int);
563 dbwrap_record_delete(rec);
566 /****************************************************************************
567 Deal with pending closes needed by POSIX locking support.
568 Note that locking_close_file() is expected to have been called
569 to delete all locks on this fsp before this function is called.
570 ****************************************************************************/
572 int fd_close_posix(const struct files_struct *fsp)
574 int saved_errno = 0;
575 int ret;
576 NTSTATUS status;
578 if (!lp_locking(fsp->conn->params) ||
579 !lp_posix_locking(fsp->conn->params) ||
580 fsp->fsp_flags.use_ofd_locks)
583 * No locking or POSIX to worry about or we are using POSIX
584 * open file description lock semantics which only removes
585 * locks on the file descriptor we're closing. Just close.
587 return close(fsp_get_pathref_fd(fsp));
590 if (get_lock_ref_count(fsp)) {
593 * There are outstanding locks on this dev/inode pair on
594 * other fds. Add our fd to the pending close db. We also
595 * set fsp_get_io_fd(fsp) to -1 inside fd_close() after returning
596 * from VFS layer.
599 add_fd_to_close_entry(fsp);
600 return 0;
603 status = dbwrap_do_locked(
604 posix_pending_close_db,
605 fd_array_key_fsp(fsp),
606 fd_close_posix_fn,
607 &saved_errno);
608 if (!NT_STATUS_IS_OK(status)) {
609 DBG_WARNING("dbwrap_do_locked failed: %s\n",
610 nt_errstr(status));
613 /* Don't need a lock ref count on this dev/ino anymore. */
614 delete_lock_ref_count(fsp);
617 * Finally close the fd associated with this fsp.
620 ret = close(fsp_get_pathref_fd(fsp));
622 if (ret == 0 && saved_errno != 0) {
623 errno = saved_errno;
624 ret = -1;
627 return ret;
630 /****************************************************************************
631 Next - the functions that deal with the mapping CIFS Windows locks onto
632 the underlying system POSIX locks.
633 ****************************************************************************/
636 * Structure used when splitting a lock range
637 * into a POSIX lock range. Doubly linked list.
640 struct lock_list {
641 struct lock_list *next;
642 struct lock_list *prev;
643 off_t start;
644 off_t size;
647 /****************************************************************************
648 Create a list of lock ranges that don't overlap a given range. Used in calculating
649 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
650 understand it :-).
651 ****************************************************************************/
653 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
654 struct lock_list *lhead,
655 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
656 const struct lock_struct *plocks,
657 int num_locks)
659 int i;
662 * Check the current lock list on this dev/inode pair.
663 * Quit if the list is deleted.
666 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
667 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
669 for (i=0; i<num_locks && lhead; i++) {
670 const struct lock_struct *lock = &plocks[i];
671 struct lock_list *l_curr;
673 /* Ignore all but read/write locks. */
674 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
675 continue;
678 /* Ignore locks not owned by this process. */
679 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
680 continue;
684 * Walk the lock list, checking for overlaps. Note that
685 * the lock list can expand within this loop if the current
686 * range being examined needs to be split.
689 for (l_curr = lhead; l_curr;) {
691 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
692 "start=%ju,size=%ju:type=%s",
693 (uintmax_t)lock->fnum,
694 (uintmax_t)lock->start,
695 (uintmax_t)lock->size,
696 posix_lock_type_name(lock->lock_type) ));
698 if ( (l_curr->start >= (lock->start + lock->size)) ||
699 (lock->start >= (l_curr->start + l_curr->size))) {
701 /* No overlap with existing lock - leave this range alone. */
702 /*********************************************
703 +---------+
704 | l_curr |
705 +---------+
706 +-------+
707 | lock |
708 +-------+
709 OR....
710 +---------+
711 | l_curr |
712 +---------+
713 **********************************************/
715 DEBUG(10,(" no overlap case.\n" ));
717 l_curr = l_curr->next;
719 } else if ( (l_curr->start >= lock->start) &&
720 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
723 * This range is completely overlapped by this existing lock range
724 * and thus should have no effect. Delete it from the list.
726 /*********************************************
727 +---------+
728 | l_curr |
729 +---------+
730 +---------------------------+
731 | lock |
732 +---------------------------+
733 **********************************************/
734 /* Save the next pointer */
735 struct lock_list *ul_next = l_curr->next;
737 DEBUG(10,(" delete case.\n" ));
739 DLIST_REMOVE(lhead, l_curr);
740 if(lhead == NULL) {
741 break; /* No more list... */
744 l_curr = ul_next;
746 } else if ( (l_curr->start >= lock->start) &&
747 (l_curr->start < lock->start + lock->size) &&
748 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
751 * This range overlaps the existing lock range at the high end.
752 * Truncate by moving start to existing range end and reducing size.
754 /*********************************************
755 +---------------+
756 | l_curr |
757 +---------------+
758 +---------------+
759 | lock |
760 +---------------+
761 BECOMES....
762 +-------+
763 | l_curr|
764 +-------+
765 **********************************************/
767 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
768 l_curr->start = lock->start + lock->size;
770 DEBUG(10, (" truncate high case: start=%ju,"
771 "size=%ju\n",
772 (uintmax_t)l_curr->start,
773 (uintmax_t)l_curr->size ));
775 l_curr = l_curr->next;
777 } else if ( (l_curr->start < lock->start) &&
778 (l_curr->start + l_curr->size > lock->start) &&
779 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
782 * This range overlaps the existing lock range at the low end.
783 * Truncate by reducing size.
785 /*********************************************
786 +---------------+
787 | l_curr |
788 +---------------+
789 +---------------+
790 | lock |
791 +---------------+
792 BECOMES....
793 +-------+
794 | l_curr|
795 +-------+
796 **********************************************/
798 l_curr->size = lock->start - l_curr->start;
800 DEBUG(10, (" truncate low case: start=%ju,"
801 "size=%ju\n",
802 (uintmax_t)l_curr->start,
803 (uintmax_t)l_curr->size ));
805 l_curr = l_curr->next;
807 } else if ( (l_curr->start < lock->start) &&
808 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
810 * Worst case scenario. Range completely overlaps an existing
811 * lock range. Split the request into two, push the new (upper) request
812 * into the dlink list, and continue with the entry after l_new (as we
813 * know that l_new will not overlap with this lock).
815 /*********************************************
816 +---------------------------+
817 | l_curr |
818 +---------------------------+
819 +---------+
820 | lock |
821 +---------+
822 BECOMES.....
823 +-------+ +---------+
824 | l_curr| | l_new |
825 +-------+ +---------+
826 **********************************************/
827 struct lock_list *l_new = talloc(ctx, struct lock_list);
829 if(l_new == NULL) {
830 DEBUG(0,("posix_lock_list: talloc fail.\n"));
831 return NULL; /* The talloc_destroy takes care of cleanup. */
834 ZERO_STRUCTP(l_new);
835 l_new->start = lock->start + lock->size;
836 l_new->size = l_curr->start + l_curr->size - l_new->start;
838 /* Truncate the l_curr. */
839 l_curr->size = lock->start - l_curr->start;
841 DEBUG(10, (" split case: curr: start=%ju,"
842 "size=%ju new: start=%ju,"
843 "size=%ju\n",
844 (uintmax_t)l_curr->start,
845 (uintmax_t)l_curr->size,
846 (uintmax_t)l_new->start,
847 (uintmax_t)l_new->size ));
850 * Add into the dlink list after the l_curr point - NOT at lhead.
852 DLIST_ADD_AFTER(lhead, l_new, l_curr);
854 /* And move after the link we added. */
855 l_curr = l_new->next;
857 } else {
860 * This logic case should never happen. Ensure this is the
861 * case by forcing an abort.... Remove in production.
863 char *msg = NULL;
865 if (asprintf(&msg, "logic flaw in cases: "
866 "l_curr: start = %ju, "
867 "size = %ju : lock: "
868 "start = %ju, size = %ju",
869 (uintmax_t)l_curr->start,
870 (uintmax_t)l_curr->size,
871 (uintmax_t)lock->start,
872 (uintmax_t)lock->size ) != -1) {
873 smb_panic(msg);
874 } else {
875 smb_panic("posix_lock_list");
878 } /* end for ( l_curr = lhead; l_curr;) */
879 } /* end for (i=0; i<num_locks && ul_head; i++) */
881 return lhead;
884 /****************************************************************************
885 POSIX function to acquire a lock. Returns True if the
886 lock could be granted, False if not.
887 ****************************************************************************/
889 bool set_posix_lock_windows_flavour(files_struct *fsp,
890 uint64_t u_offset,
891 uint64_t u_count,
892 enum brl_type lock_type,
893 const struct lock_context *lock_ctx,
894 const struct lock_struct *plocks,
895 int num_locks,
896 int *errno_ret)
898 off_t offset;
899 off_t count;
900 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
901 bool ret = True;
902 size_t lock_count;
903 TALLOC_CTX *l_ctx = NULL;
904 struct lock_list *llist = NULL;
905 struct lock_list *ll = NULL;
907 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
908 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
909 (uintmax_t)u_offset, (uintmax_t)u_count,
910 posix_lock_type_name(lock_type)));
913 * If the requested lock won't fit in the POSIX range, we will
914 * pretend it was successful.
917 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
918 increment_lock_ref_count(fsp);
919 return True;
923 * Windows is very strange. It allows read locks to be overlaid
924 * (even over a write lock), but leaves the write lock in force until the first
925 * unlock. It also reference counts the locks. This means the following sequence :
927 * process1 process2
928 * ------------------------------------------------------------------------
929 * WRITE LOCK : start = 2, len = 10
930 * READ LOCK: start =0, len = 10 - FAIL
931 * READ LOCK : start = 0, len = 14
932 * READ LOCK: start =0, len = 10 - FAIL
933 * UNLOCK : start = 2, len = 10
934 * READ LOCK: start =0, len = 10 - OK
936 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
937 * would leave a single read lock over the 0-14 region.
940 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
941 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
942 return False;
945 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
946 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
947 talloc_destroy(l_ctx);
948 return False;
952 * Create the initial list entry containing the
953 * lock we want to add.
956 ZERO_STRUCTP(ll);
957 ll->start = offset;
958 ll->size = count;
960 DLIST_ADD(llist, ll);
963 * The following call calculates if there are any
964 * overlapping locks held by this process on
965 * fd's open on the same file and splits this list
966 * into a list of lock ranges that do not overlap with existing
967 * POSIX locks.
970 llist = posix_lock_list(l_ctx,
971 llist,
972 lock_ctx, /* Lock context llist belongs to. */
973 plocks,
974 num_locks);
977 * Add the POSIX locks on the list of ranges returned.
978 * As the lock is supposed to be added atomically, we need to
979 * back out all the locks if any one of these calls fail.
982 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
983 offset = ll->start;
984 count = ll->size;
986 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
987 "Type = %s: offset = %ju, count = %ju\n",
988 posix_lock_type_name(posix_lock_type),
989 (uintmax_t)offset, (uintmax_t)count ));
991 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
992 *errno_ret = errno;
993 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
994 "fail !: Type = %s: offset = %ju, "
995 "count = %ju. Errno = %s\n",
996 posix_lock_type_name(posix_lock_type),
997 (uintmax_t)offset, (uintmax_t)count,
998 strerror(errno) ));
999 ret = False;
1000 break;
1004 if (!ret) {
1007 * Back out all the POSIX locks we have on fail.
1010 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1011 offset = ll->start;
1012 count = ll->size;
1014 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1015 "out locks: Type = %s: offset = %ju, "
1016 "count = %ju\n",
1017 posix_lock_type_name(posix_lock_type),
1018 (uintmax_t)offset, (uintmax_t)count ));
1020 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1022 } else {
1023 /* Remember the number of locks we have on this dev/ino pair. */
1024 increment_lock_ref_count(fsp);
1027 talloc_destroy(l_ctx);
1028 return ret;
1031 /****************************************************************************
1032 POSIX function to release a lock. Returns True if the
1033 lock could be released, False if not.
1034 ****************************************************************************/
1036 bool release_posix_lock_windows_flavour(files_struct *fsp,
1037 uint64_t u_offset,
1038 uint64_t u_count,
1039 enum brl_type deleted_lock_type,
1040 const struct lock_context *lock_ctx,
1041 const struct lock_struct *plocks,
1042 int num_locks)
1044 off_t offset;
1045 off_t count;
1046 bool ret = True;
1047 TALLOC_CTX *ul_ctx = NULL;
1048 struct lock_list *ulist = NULL;
1049 struct lock_list *ul = NULL;
1051 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1052 "count = %ju\n", fsp_str_dbg(fsp),
1053 (uintmax_t)u_offset, (uintmax_t)u_count));
1055 /* Remember the number of locks we have on this dev/ino pair. */
1056 decrement_lock_ref_count(fsp);
1059 * If the requested lock won't fit in the POSIX range, we will
1060 * pretend it was successful.
1063 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1064 return True;
1067 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1068 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1069 return False;
1072 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1073 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1074 talloc_destroy(ul_ctx);
1075 return False;
1079 * Create the initial list entry containing the
1080 * lock we want to remove.
1083 ZERO_STRUCTP(ul);
1084 ul->start = offset;
1085 ul->size = count;
1087 DLIST_ADD(ulist, ul);
1090 * The following call calculates if there are any
1091 * overlapping locks held by this process on
1092 * fd's open on the same file and creates a
1093 * list of unlock ranges that will allow
1094 * POSIX lock ranges to remain on the file whilst the
1095 * unlocks are performed.
1098 ulist = posix_lock_list(ul_ctx,
1099 ulist,
1100 lock_ctx, /* Lock context ulist belongs to. */
1101 plocks,
1102 num_locks);
1105 * If there were any overlapped entries (list is > 1 or size or start have changed),
1106 * and the lock_type we just deleted from
1107 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1108 * the POSIX lock to a read lock. This allows any overlapping read locks
1109 * to be atomically maintained.
1112 if (deleted_lock_type == WRITE_LOCK &&
1113 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1115 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1116 "lock to READ: offset = %ju, count = %ju\n",
1117 (uintmax_t)offset, (uintmax_t)count ));
1119 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1120 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1121 talloc_destroy(ul_ctx);
1122 return False;
1127 * Release the POSIX locks on the list of ranges returned.
1130 for(; ulist; ulist = ulist->next) {
1131 offset = ulist->start;
1132 count = ulist->size;
1134 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1135 "offset = %ju, count = %ju\n",
1136 (uintmax_t)offset, (uintmax_t)count ));
1138 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1139 ret = False;
1143 talloc_destroy(ul_ctx);
1144 return ret;
1147 /****************************************************************************
1148 Next - the functions that deal with mapping CIFS POSIX locks onto
1149 the underlying system POSIX locks.
1150 ****************************************************************************/
1152 /****************************************************************************
1153 We only increment the lock ref count when we see a POSIX lock on a context
1154 that doesn't already have them.
1155 ****************************************************************************/
1157 static void increment_posix_lock_count(const files_struct *fsp,
1158 uint64_t smblctx)
1160 NTSTATUS status;
1161 TDB_DATA ctx_key;
1162 TDB_DATA val = { 0 };
1164 ctx_key.dptr = (uint8_t *)&smblctx;
1165 ctx_key.dsize = sizeof(smblctx);
1168 * Don't increment if we already have any POSIX flavor
1169 * locks on this context.
1171 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1172 return;
1175 /* Remember that we have POSIX flavor locks on this context. */
1176 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1177 SMB_ASSERT(NT_STATUS_IS_OK(status));
1179 increment_lock_ref_count(fsp);
1181 DEBUG(10,("posix_locks set for file %s\n",
1182 fsp_str_dbg(fsp)));
1185 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1187 NTSTATUS status;
1188 TDB_DATA ctx_key;
1190 ctx_key.dptr = (uint8_t *)&smblctx;
1191 ctx_key.dsize = sizeof(smblctx);
1193 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1194 SMB_ASSERT(NT_STATUS_IS_OK(status));
1196 decrement_lock_ref_count(fsp);
1198 DEBUG(10,("posix_locks deleted for file %s\n",
1199 fsp_str_dbg(fsp)));
1202 /****************************************************************************
1203 Return true if any locks exist on the given lock context.
1204 ****************************************************************************/
1206 static bool locks_exist_on_context(const struct lock_struct *plocks,
1207 int num_locks,
1208 const struct lock_context *lock_ctx)
1210 int i;
1212 for (i=0; i < num_locks; i++) {
1213 const struct lock_struct *lock = &plocks[i];
1215 /* Ignore all but read/write locks. */
1216 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1217 continue;
1220 /* Ignore locks not owned by this process. */
1221 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
1222 continue;
1225 if (lock_ctx->smblctx == lock->context.smblctx) {
1226 return true;
1229 return false;
1232 /****************************************************************************
1233 POSIX function to acquire a lock. Returns True if the
1234 lock could be granted, False if not.
1235 As POSIX locks don't stack or conflict (they just overwrite)
1236 we can map the requested lock directly onto a system one. We
1237 know it doesn't conflict with locks on other contexts as the
1238 upper layer would have refused it.
1239 ****************************************************************************/
1241 bool set_posix_lock_posix_flavour(files_struct *fsp,
1242 uint64_t u_offset,
1243 uint64_t u_count,
1244 enum brl_type lock_type,
1245 const struct lock_context *lock_ctx,
1246 int *errno_ret)
1248 off_t offset;
1249 off_t count;
1250 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1252 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1253 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1254 (uintmax_t)u_offset, (uintmax_t)u_count,
1255 posix_lock_type_name(lock_type)));
1258 * If the requested lock won't fit in the POSIX range, we will
1259 * pretend it was successful.
1262 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1263 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1264 return True;
1267 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1268 *errno_ret = errno;
1269 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1270 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1271 return False;
1273 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1274 return True;
1277 /****************************************************************************
1278 POSIX function to release a lock. Returns True if the
1279 lock could be released, False if not.
1280 We are given a complete lock state from the upper layer which is what the lock
1281 state should be after the unlock has already been done, so what
1282 we do is punch out holes in the unlock range where locks owned by this process
1283 have a different lock context.
1284 ****************************************************************************/
1286 bool release_posix_lock_posix_flavour(files_struct *fsp,
1287 uint64_t u_offset,
1288 uint64_t u_count,
1289 const struct lock_context *lock_ctx,
1290 const struct lock_struct *plocks,
1291 int num_locks)
1293 bool ret = True;
1294 off_t offset;
1295 off_t count;
1296 TALLOC_CTX *ul_ctx = NULL;
1297 struct lock_list *ulist = NULL;
1298 struct lock_list *ul = NULL;
1300 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1301 "count = %ju\n", fsp_str_dbg(fsp),
1302 (uintmax_t)u_offset, (uintmax_t)u_count));
1305 * If the requested lock won't fit in the POSIX range, we will
1306 * pretend it was successful.
1309 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1310 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1311 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1313 return True;
1316 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1317 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1318 return False;
1321 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1322 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1323 talloc_destroy(ul_ctx);
1324 return False;
1328 * Create the initial list entry containing the
1329 * lock we want to remove.
1332 ZERO_STRUCTP(ul);
1333 ul->start = offset;
1334 ul->size = count;
1336 DLIST_ADD(ulist, ul);
1339 * Walk the given array creating a linked list
1340 * of unlock requests.
1343 ulist = posix_lock_list(ul_ctx,
1344 ulist,
1345 lock_ctx, /* Lock context ulist belongs to. */
1346 plocks,
1347 num_locks);
1350 * Release the POSIX locks on the list of ranges returned.
1353 for(; ulist; ulist = ulist->next) {
1354 offset = ulist->start;
1355 count = ulist->size;
1357 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1358 "offset = %ju, count = %ju\n",
1359 (uintmax_t)offset, (uintmax_t)count ));
1361 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1362 ret = False;
1366 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1367 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1369 talloc_destroy(ul_ctx);
1370 return ret;