s3: libsmb: In cli_qpathinfo_send() (SMBtrans2:TRANSACT2_QPATHINFO) check for DFS...
[Samba.git] / source3 / locking / posix.c
blob536d8bf802bb47c1ee54d9bc73c7324b468362db
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
31 #include "smbd/fd_handle.h"
33 #undef DBGC_CLASS
34 #define DBGC_CLASS DBGC_LOCKING
37 * The pending close database handle.
40 static struct db_context *posix_pending_close_db;
42 /****************************************************************************
43 First - the functions that deal with the underlying system locks - these
44 functions are used no matter if we're mapping CIFS Windows locks or CIFS
45 POSIX locks onto POSIX.
46 ****************************************************************************/
48 /****************************************************************************
49 Utility function to map a lock type correctly depending on the open
50 mode of a file.
51 ****************************************************************************/
53 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
55 if ((lock_type == WRITE_LOCK) && !fsp->fsp_flags.can_write) {
57 * Many UNIX's cannot get a write lock on a file opened read-only.
58 * Win32 locking semantics allow this.
59 * Do the best we can and attempt a read-only lock.
61 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
62 return F_RDLCK;
66 * This return should be the most normal, as we attempt
67 * to always open files read/write.
70 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
73 /****************************************************************************
74 Debugging aid :-).
75 ****************************************************************************/
77 static const char *posix_lock_type_name(int lock_type)
79 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
82 /****************************************************************************
83 Check to see if the given unsigned lock range is within the possible POSIX
84 range. Modifies the given args to be in range if possible, just returns
85 False if not.
86 ****************************************************************************/
88 #define SMB_OFF_T_BITS (sizeof(off_t)*8)
90 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
91 uint64_t u_offset, uint64_t u_count)
93 off_t offset = (off_t)u_offset;
94 off_t count = (off_t)u_count;
97 * For the type of system we are, attempt to
98 * find the maximum positive lock offset as an off_t.
101 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
103 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
104 #else
106 * In this case off_t is 64 bits,
107 * and the underlying system can handle 64 bit signed locks.
110 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
111 off_t mask = (mask2<<1);
112 off_t max_positive_lock_offset = ~mask;
114 #endif
116 * POSIX locks of length zero mean lock to end-of-file.
117 * Win32 locks of length zero are point probes. Ignore
118 * any Win32 locks of length zero. JRA.
121 if (count == 0) {
122 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
123 return False;
127 * If the given offset was > max_positive_lock_offset then we cannot map this at all
128 * ignore this lock.
131 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
132 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
133 "and we cannot handle this. Ignoring lock.\n",
134 (uintmax_t)u_offset,
135 (uintmax_t)max_positive_lock_offset));
136 return False;
140 * We must truncate the count to less than max_positive_lock_offset.
143 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
144 count = max_positive_lock_offset;
148 * Truncate count to end at max lock offset.
151 if (offset > INT64_MAX - count ||
152 offset + count > max_positive_lock_offset) {
153 count = max_positive_lock_offset - offset;
157 * If we ate all the count, ignore this lock.
160 if (count == 0) {
161 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
162 "u_offset = %ju, u_count = %ju\n",
163 (uintmax_t)u_offset,
164 (uintmax_t)u_count));
165 return False;
169 * The mapping was successful.
172 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
173 "count_out = %ju\n",
174 (uintmax_t)offset, (uintmax_t)count));
176 *offset_out = offset;
177 *count_out = count;
179 return True;
182 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
183 struct files_struct *fsp, int op, off_t offset,
184 off_t count, int type)
186 VFS_FIND(lock);
187 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
190 /****************************************************************************
191 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
192 broken NFS implementations.
193 ****************************************************************************/
195 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
197 bool ret;
199 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
200 fsp_get_io_fd(fsp),op,(intmax_t)offset,(intmax_t)count,type));
202 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
204 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
206 if ((errno == EINVAL) &&
207 (op != F_GETLK &&
208 op != F_SETLK &&
209 op != F_SETLKW)) {
210 DEBUG(0,("WARNING: OFD locks in use and no kernel "
211 "support. Try setting "
212 "'smbd:force process locks = true' "
213 "in smb.conf\n"));
214 } else {
215 DEBUG(0, ("WARNING: lock request at offset "
216 "%ju, length %ju returned\n",
217 (uintmax_t)offset, (uintmax_t)count));
218 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
219 "lock offsets\n", strerror(errno)));
220 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
224 * If the offset is > 0x7FFFFFFF then this will cause problems on
225 * 32 bit NFS mounted filesystems. Just ignore it.
228 if (offset & ~((off_t)0x7fffffff)) {
229 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
230 return True;
233 if (count & ~((off_t)0x7fffffff)) {
234 /* 32 bit NFS file system, retry with smaller offset */
235 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
236 errno = 0;
237 count &= 0x7fffffff;
238 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
242 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
243 return ret;
246 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
247 struct files_struct *fsp, off_t *poffset,
248 off_t *pcount, int *ptype, pid_t *ppid)
250 VFS_FIND(getlock);
251 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
252 ppid);
255 /****************************************************************************
256 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
257 broken NFS implementations.
258 ****************************************************************************/
260 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
262 pid_t pid;
263 bool ret;
265 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
266 fsp_get_io_fd(fsp), (uintmax_t)*poffset, (uintmax_t)*pcount,
267 *ptype));
269 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
271 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
273 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
274 "offset %ju, length %ju returned\n",
275 (uintmax_t)*poffset, (uintmax_t)*pcount));
276 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
277 "lock offsets\n", strerror(errno)));
278 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
281 * If the offset is > 0x7FFFFFFF then this will cause problems on
282 * 32 bit NFS mounted filesystems. Just ignore it.
285 if (*poffset & ~((off_t)0x7fffffff)) {
286 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
287 return True;
290 if (*pcount & ~((off_t)0x7fffffff)) {
291 /* 32 bit NFS file system, retry with smaller offset */
292 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
293 errno = 0;
294 *pcount &= 0x7fffffff;
295 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
299 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
300 return ret;
303 /****************************************************************************
304 POSIX function to see if a file region is locked. Returns True if the
305 region is locked, False otherwise.
306 ****************************************************************************/
308 bool is_posix_locked(files_struct *fsp,
309 uint64_t *pu_offset,
310 uint64_t *pu_count,
311 enum brl_type *plock_type,
312 enum brl_flavour lock_flav)
314 off_t offset;
315 off_t count;
316 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
318 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
319 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
320 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
323 * If the requested lock won't fit in the POSIX range, we will
324 * never set it, so presume it is not locked.
327 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
328 return False;
331 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
332 return False;
335 if (posix_lock_type == F_UNLCK) {
336 return False;
339 if (lock_flav == POSIX_LOCK) {
340 /* Only POSIX lock queries need to know the details. */
341 *pu_offset = (uint64_t)offset;
342 *pu_count = (uint64_t)count;
343 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
345 return True;
348 /****************************************************************************
349 Next - the functions that deal with in memory database storing representations
350 of either Windows CIFS locks or POSIX CIFS locks.
351 ****************************************************************************/
353 /* The key used in the in-memory POSIX databases. */
355 struct lock_ref_count_key {
356 struct file_id id;
357 char r;
360 /*******************************************************************
361 Form a static locking key for a dev/inode pair for the lock ref count
362 ******************************************************************/
364 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
365 struct lock_ref_count_key *tmp)
367 ZERO_STRUCTP(tmp);
368 tmp->id = fsp->file_id;
369 tmp->r = 'r';
370 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
373 /*******************************************************************
374 Convenience function to get an fd_array key from an fsp.
375 ******************************************************************/
377 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
379 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
382 /*******************************************************************
383 Create the in-memory POSIX lock databases.
384 ********************************************************************/
386 bool posix_locking_init(bool read_only)
388 if (posix_pending_close_db != NULL) {
389 return true;
392 posix_pending_close_db = db_open_rbt(NULL);
394 if (posix_pending_close_db == NULL) {
395 DEBUG(0,("Failed to open POSIX pending close database.\n"));
396 return false;
399 return true;
402 /*******************************************************************
403 Delete the in-memory POSIX lock databases.
404 ********************************************************************/
406 bool posix_locking_end(void)
409 * Shouldn't we close all fd's here?
411 TALLOC_FREE(posix_pending_close_db);
412 return true;
415 /****************************************************************************
416 Next - the functions that deal with reference count of number of locks open
417 on a dev/ino pair.
418 ****************************************************************************/
420 /****************************************************************************
421 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
422 ****************************************************************************/
424 static void increment_lock_ref_count(const files_struct *fsp)
426 struct lock_ref_count_key tmp;
427 int32_t lock_ref_count = 0;
428 NTSTATUS status;
430 status = dbwrap_change_int32_atomic(
431 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
432 &lock_ref_count, 1);
434 SMB_ASSERT(NT_STATUS_IS_OK(status));
435 SMB_ASSERT(lock_ref_count < INT32_MAX);
437 DEBUG(10,("lock_ref_count for file %s = %d\n",
438 fsp_str_dbg(fsp), (int)(lock_ref_count + 1)));
441 /****************************************************************************
442 Reduce the lock ref count.
443 ****************************************************************************/
445 static void decrement_lock_ref_count(const files_struct *fsp)
447 struct lock_ref_count_key tmp;
448 int32_t lock_ref_count = 0;
449 NTSTATUS status;
451 status = dbwrap_change_int32_atomic(
452 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
453 &lock_ref_count, -1);
455 SMB_ASSERT(NT_STATUS_IS_OK(status));
456 SMB_ASSERT(lock_ref_count > 0);
458 DEBUG(10,("lock_ref_count for file %s = %d\n",
459 fsp_str_dbg(fsp), (int)(lock_ref_count - 1)));
462 /****************************************************************************
463 Fetch the lock ref count.
464 ****************************************************************************/
466 static int32_t get_lock_ref_count(const files_struct *fsp)
468 struct lock_ref_count_key tmp;
469 NTSTATUS status;
470 int32_t lock_ref_count = 0;
472 status = dbwrap_fetch_int32(
473 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
474 &lock_ref_count);
476 if (!NT_STATUS_IS_OK(status) &&
477 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
478 DEBUG(0, ("Error fetching "
479 "lock ref count for file %s: %s\n",
480 fsp_str_dbg(fsp), nt_errstr(status)));
482 return lock_ref_count;
485 /****************************************************************************
486 Delete a lock_ref_count entry.
487 ****************************************************************************/
489 static void delete_lock_ref_count(const files_struct *fsp)
491 struct lock_ref_count_key tmp;
493 /* Not a bug if it doesn't exist - no locks were ever granted. */
495 dbwrap_delete(posix_pending_close_db,
496 locking_ref_count_key_fsp(fsp, &tmp));
498 DEBUG(10,("delete_lock_ref_count for file %s\n",
499 fsp_str_dbg(fsp)));
502 /****************************************************************************
503 Next - the functions that deal with storing fd's that have outstanding
504 POSIX locks when closed.
505 ****************************************************************************/
507 /****************************************************************************
508 The records in posix_pending_close_db are composed of an array of
509 ints keyed by dev/ino pair. Those ints are the fd's that were open on
510 this dev/ino pair that should have been closed, but can't as the lock
511 ref count is non zero.
512 ****************************************************************************/
514 struct add_fd_to_close_entry_state {
515 const struct files_struct *fsp;
518 static void add_fd_to_close_entry_fn(
519 struct db_record *rec,
520 TDB_DATA value,
521 void *private_data)
523 struct add_fd_to_close_entry_state *state = private_data;
524 int fd = fsp_get_pathref_fd(state->fsp);
525 TDB_DATA values[] = {
526 value,
527 { .dptr = (uint8_t *)&fd,
528 .dsize = sizeof(fd) },
530 NTSTATUS status;
532 SMB_ASSERT((values[0].dsize % sizeof(int)) == 0);
534 status = dbwrap_record_storev(rec, values, ARRAY_SIZE(values), 0);
535 SMB_ASSERT(NT_STATUS_IS_OK(status));
538 /****************************************************************************
539 Add an fd to the pending close db.
540 ****************************************************************************/
542 static void add_fd_to_close_entry(const files_struct *fsp)
544 struct add_fd_to_close_entry_state state = { .fsp = fsp };
545 NTSTATUS status;
547 status = dbwrap_do_locked(
548 posix_pending_close_db,
549 fd_array_key_fsp(fsp),
550 add_fd_to_close_entry_fn,
551 &state);
552 SMB_ASSERT(NT_STATUS_IS_OK(status));
554 DBG_DEBUG("added fd %d file %s\n",
555 fsp_get_pathref_fd(fsp),
556 fsp_str_dbg(fsp));
559 static void fd_close_posix_fn(
560 struct db_record *rec,
561 TDB_DATA data,
562 void *private_data)
564 size_t num_fds, i;
566 SMB_ASSERT((data.dsize % sizeof(int)) == 0);
567 num_fds = data.dsize / sizeof(int);
569 for (i=0; i<num_fds; i++) {
570 int fd;
571 memcpy(&fd, data.dptr, sizeof(int));
572 close(fd);
573 data.dptr += sizeof(int);
575 dbwrap_record_delete(rec);
578 /****************************************************************************
579 Deal with pending closes needed by POSIX locking support.
580 Note that locking_close_file() is expected to have been called
581 to delete all locks on this fsp before this function is called.
582 ****************************************************************************/
584 int fd_close_posix(const struct files_struct *fsp)
586 NTSTATUS status;
588 if (!lp_locking(fsp->conn->params) ||
589 !lp_posix_locking(fsp->conn->params) ||
590 fsp->fsp_flags.use_ofd_locks)
593 * No locking or POSIX to worry about or we are using POSIX
594 * open file description lock semantics which only removes
595 * locks on the file descriptor we're closing. Just close.
597 return close(fsp_get_pathref_fd(fsp));
600 if (get_lock_ref_count(fsp)) {
603 * There are outstanding locks on this dev/inode pair on
604 * other fds. Add our fd to the pending close db. We also
605 * set fsp_get_io_fd(fsp) to -1 inside fd_close() after returning
606 * from VFS layer.
609 add_fd_to_close_entry(fsp);
610 return 0;
613 status = dbwrap_do_locked(
614 posix_pending_close_db,
615 fd_array_key_fsp(fsp),
616 fd_close_posix_fn,
617 NULL);
618 if (!NT_STATUS_IS_OK(status)) {
619 DBG_WARNING("dbwrap_do_locked failed: %s\n",
620 nt_errstr(status));
623 /* Don't need a lock ref count on this dev/ino anymore. */
624 delete_lock_ref_count(fsp);
627 * Finally close the fd associated with this fsp.
630 return close(fsp_get_pathref_fd(fsp));
633 /****************************************************************************
634 Next - the functions that deal with the mapping CIFS Windows locks onto
635 the underlying system POSIX locks.
636 ****************************************************************************/
639 * Structure used when splitting a lock range
640 * into a POSIX lock range. Doubly linked list.
643 struct lock_list {
644 struct lock_list *next;
645 struct lock_list *prev;
646 off_t start;
647 off_t size;
650 /****************************************************************************
651 Create a list of lock ranges that don't overlap a given range. Used in calculating
652 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
653 understand it :-).
654 ****************************************************************************/
656 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
657 struct lock_list *lhead,
658 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
659 const struct lock_struct *plocks,
660 int num_locks)
662 int i;
665 * Check the current lock list on this dev/inode pair.
666 * Quit if the list is deleted.
669 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
670 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
672 for (i=0; i<num_locks && lhead; i++) {
673 const struct lock_struct *lock = &plocks[i];
674 struct lock_list *l_curr;
676 /* Ignore all but read/write locks. */
677 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
678 continue;
681 /* Ignore locks not owned by this process. */
682 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
683 continue;
687 * Walk the lock list, checking for overlaps. Note that
688 * the lock list can expand within this loop if the current
689 * range being examined needs to be split.
692 for (l_curr = lhead; l_curr;) {
694 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
695 "start=%ju,size=%ju:type=%s",
696 (uintmax_t)lock->fnum,
697 (uintmax_t)lock->start,
698 (uintmax_t)lock->size,
699 posix_lock_type_name(lock->lock_type) ));
701 if ( (l_curr->start >= (lock->start + lock->size)) ||
702 (lock->start >= (l_curr->start + l_curr->size))) {
704 /* No overlap with existing lock - leave this range alone. */
705 /*********************************************
706 +---------+
707 | l_curr |
708 +---------+
709 +-------+
710 | lock |
711 +-------+
712 OR....
713 +---------+
714 | l_curr |
715 +---------+
716 **********************************************/
718 DEBUG(10,(" no overlap case.\n" ));
720 l_curr = l_curr->next;
722 } else if ( (l_curr->start >= lock->start) &&
723 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
726 * This range is completely overlapped by this existing lock range
727 * and thus should have no effect. Delete it from the list.
729 /*********************************************
730 +---------+
731 | l_curr |
732 +---------+
733 +---------------------------+
734 | lock |
735 +---------------------------+
736 **********************************************/
737 /* Save the next pointer */
738 struct lock_list *ul_next = l_curr->next;
740 DEBUG(10,(" delete case.\n" ));
742 DLIST_REMOVE(lhead, l_curr);
743 if(lhead == NULL) {
744 break; /* No more list... */
747 l_curr = ul_next;
749 } else if ( (l_curr->start >= lock->start) &&
750 (l_curr->start < lock->start + lock->size) &&
751 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
754 * This range overlaps the existing lock range at the high end.
755 * Truncate by moving start to existing range end and reducing size.
757 /*********************************************
758 +---------------+
759 | l_curr |
760 +---------------+
761 +---------------+
762 | lock |
763 +---------------+
764 BECOMES....
765 +-------+
766 | l_curr|
767 +-------+
768 **********************************************/
770 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
771 l_curr->start = lock->start + lock->size;
773 DEBUG(10, (" truncate high case: start=%ju,"
774 "size=%ju\n",
775 (uintmax_t)l_curr->start,
776 (uintmax_t)l_curr->size ));
778 l_curr = l_curr->next;
780 } else if ( (l_curr->start < lock->start) &&
781 (l_curr->start + l_curr->size > lock->start) &&
782 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
785 * This range overlaps the existing lock range at the low end.
786 * Truncate by reducing size.
788 /*********************************************
789 +---------------+
790 | l_curr |
791 +---------------+
792 +---------------+
793 | lock |
794 +---------------+
795 BECOMES....
796 +-------+
797 | l_curr|
798 +-------+
799 **********************************************/
801 l_curr->size = lock->start - l_curr->start;
803 DEBUG(10, (" truncate low case: start=%ju,"
804 "size=%ju\n",
805 (uintmax_t)l_curr->start,
806 (uintmax_t)l_curr->size ));
808 l_curr = l_curr->next;
810 } else if ( (l_curr->start < lock->start) &&
811 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
813 * Worst case scenario. Range completely overlaps an existing
814 * lock range. Split the request into two, push the new (upper) request
815 * into the dlink list, and continue with the entry after l_new (as we
816 * know that l_new will not overlap with this lock).
818 /*********************************************
819 +---------------------------+
820 | l_curr |
821 +---------------------------+
822 +---------+
823 | lock |
824 +---------+
825 BECOMES.....
826 +-------+ +---------+
827 | l_curr| | l_new |
828 +-------+ +---------+
829 **********************************************/
830 struct lock_list *l_new = talloc(ctx, struct lock_list);
832 if(l_new == NULL) {
833 DEBUG(0,("posix_lock_list: talloc fail.\n"));
834 return NULL; /* The talloc_destroy takes care of cleanup. */
837 ZERO_STRUCTP(l_new);
838 l_new->start = lock->start + lock->size;
839 l_new->size = l_curr->start + l_curr->size - l_new->start;
841 /* Truncate the l_curr. */
842 l_curr->size = lock->start - l_curr->start;
844 DEBUG(10, (" split case: curr: start=%ju,"
845 "size=%ju new: start=%ju,"
846 "size=%ju\n",
847 (uintmax_t)l_curr->start,
848 (uintmax_t)l_curr->size,
849 (uintmax_t)l_new->start,
850 (uintmax_t)l_new->size ));
853 * Add into the dlink list after the l_curr point - NOT at lhead.
855 DLIST_ADD_AFTER(lhead, l_new, l_curr);
857 /* And move after the link we added. */
858 l_curr = l_new->next;
860 } else {
863 * This logic case should never happen. Ensure this is the
864 * case by forcing an abort.... Remove in production.
866 char *msg = NULL;
868 if (asprintf(&msg, "logic flaw in cases: "
869 "l_curr: start = %ju, "
870 "size = %ju : lock: "
871 "start = %ju, size = %ju",
872 (uintmax_t)l_curr->start,
873 (uintmax_t)l_curr->size,
874 (uintmax_t)lock->start,
875 (uintmax_t)lock->size ) != -1) {
876 smb_panic(msg);
877 } else {
878 smb_panic("posix_lock_list");
881 } /* end for ( l_curr = lhead; l_curr;) */
882 } /* end for (i=0; i<num_locks && ul_head; i++) */
884 return lhead;
887 /****************************************************************************
888 POSIX function to acquire a lock. Returns True if the
889 lock could be granted, False if not.
890 ****************************************************************************/
892 bool set_posix_lock_windows_flavour(files_struct *fsp,
893 uint64_t u_offset,
894 uint64_t u_count,
895 enum brl_type lock_type,
896 const struct lock_context *lock_ctx,
897 const struct lock_struct *plocks,
898 int num_locks,
899 int *errno_ret)
901 off_t offset;
902 off_t count;
903 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
904 bool ret = True;
905 size_t lock_count;
906 TALLOC_CTX *l_ctx = NULL;
907 struct lock_list *llist = NULL;
908 struct lock_list *ll = NULL;
910 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
911 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
912 (uintmax_t)u_offset, (uintmax_t)u_count,
913 posix_lock_type_name(lock_type)));
916 * If the requested lock won't fit in the POSIX range, we will
917 * pretend it was successful.
920 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
921 increment_lock_ref_count(fsp);
922 return True;
926 * Windows is very strange. It allows read locks to be overlayed
927 * (even over a write lock), but leaves the write lock in force until the first
928 * unlock. It also reference counts the locks. This means the following sequence :
930 * process1 process2
931 * ------------------------------------------------------------------------
932 * WRITE LOCK : start = 2, len = 10
933 * READ LOCK: start =0, len = 10 - FAIL
934 * READ LOCK : start = 0, len = 14
935 * READ LOCK: start =0, len = 10 - FAIL
936 * UNLOCK : start = 2, len = 10
937 * READ LOCK: start =0, len = 10 - OK
939 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
940 * would leave a single read lock over the 0-14 region.
943 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
944 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
945 return False;
948 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
949 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
950 talloc_destroy(l_ctx);
951 return False;
955 * Create the initial list entry containing the
956 * lock we want to add.
959 ZERO_STRUCTP(ll);
960 ll->start = offset;
961 ll->size = count;
963 DLIST_ADD(llist, ll);
966 * The following call calculates if there are any
967 * overlapping locks held by this process on
968 * fd's open on the same file and splits this list
969 * into a list of lock ranges that do not overlap with existing
970 * POSIX locks.
973 llist = posix_lock_list(l_ctx,
974 llist,
975 lock_ctx, /* Lock context llist belongs to. */
976 plocks,
977 num_locks);
980 * Add the POSIX locks on the list of ranges returned.
981 * As the lock is supposed to be added atomically, we need to
982 * back out all the locks if any one of these calls fail.
985 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
986 offset = ll->start;
987 count = ll->size;
989 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
990 "Type = %s: offset = %ju, count = %ju\n",
991 posix_lock_type_name(posix_lock_type),
992 (uintmax_t)offset, (uintmax_t)count ));
994 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
995 *errno_ret = errno;
996 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
997 "fail !: Type = %s: offset = %ju, "
998 "count = %ju. Errno = %s\n",
999 posix_lock_type_name(posix_lock_type),
1000 (uintmax_t)offset, (uintmax_t)count,
1001 strerror(errno) ));
1002 ret = False;
1003 break;
1007 if (!ret) {
1010 * Back out all the POSIX locks we have on fail.
1013 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1014 offset = ll->start;
1015 count = ll->size;
1017 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1018 "out locks: Type = %s: offset = %ju, "
1019 "count = %ju\n",
1020 posix_lock_type_name(posix_lock_type),
1021 (uintmax_t)offset, (uintmax_t)count ));
1023 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1025 } else {
1026 /* Remember the number of locks we have on this dev/ino pair. */
1027 increment_lock_ref_count(fsp);
1030 talloc_destroy(l_ctx);
1031 return ret;
1034 /****************************************************************************
1035 POSIX function to release a lock. Returns True if the
1036 lock could be released, False if not.
1037 ****************************************************************************/
1039 bool release_posix_lock_windows_flavour(files_struct *fsp,
1040 uint64_t u_offset,
1041 uint64_t u_count,
1042 enum brl_type deleted_lock_type,
1043 const struct lock_context *lock_ctx,
1044 const struct lock_struct *plocks,
1045 int num_locks)
1047 off_t offset;
1048 off_t count;
1049 bool ret = True;
1050 TALLOC_CTX *ul_ctx = NULL;
1051 struct lock_list *ulist = NULL;
1052 struct lock_list *ul = NULL;
1054 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1055 "count = %ju\n", fsp_str_dbg(fsp),
1056 (uintmax_t)u_offset, (uintmax_t)u_count));
1058 /* Remember the number of locks we have on this dev/ino pair. */
1059 decrement_lock_ref_count(fsp);
1062 * If the requested lock won't fit in the POSIX range, we will
1063 * pretend it was successful.
1066 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1067 return True;
1070 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1071 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1072 return False;
1075 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1076 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1077 talloc_destroy(ul_ctx);
1078 return False;
1082 * Create the initial list entry containing the
1083 * lock we want to remove.
1086 ZERO_STRUCTP(ul);
1087 ul->start = offset;
1088 ul->size = count;
1090 DLIST_ADD(ulist, ul);
1093 * The following call calculates if there are any
1094 * overlapping locks held by this process on
1095 * fd's open on the same file and creates a
1096 * list of unlock ranges that will allow
1097 * POSIX lock ranges to remain on the file whilst the
1098 * unlocks are performed.
1101 ulist = posix_lock_list(ul_ctx,
1102 ulist,
1103 lock_ctx, /* Lock context ulist belongs to. */
1104 plocks,
1105 num_locks);
1108 * If there were any overlapped entries (list is > 1 or size or start have changed),
1109 * and the lock_type we just deleted from
1110 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1111 * the POSIX lock to a read lock. This allows any overlapping read locks
1112 * to be atomically maintained.
1115 if (deleted_lock_type == WRITE_LOCK &&
1116 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1118 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1119 "lock to READ: offset = %ju, count = %ju\n",
1120 (uintmax_t)offset, (uintmax_t)count ));
1122 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1123 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1124 talloc_destroy(ul_ctx);
1125 return False;
1130 * Release the POSIX locks on the list of ranges returned.
1133 for(; ulist; ulist = ulist->next) {
1134 offset = ulist->start;
1135 count = ulist->size;
1137 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1138 "offset = %ju, count = %ju\n",
1139 (uintmax_t)offset, (uintmax_t)count ));
1141 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1142 ret = False;
1146 talloc_destroy(ul_ctx);
1147 return ret;
1150 /****************************************************************************
1151 Next - the functions that deal with mapping CIFS POSIX locks onto
1152 the underlying system POSIX locks.
1153 ****************************************************************************/
1155 /****************************************************************************
1156 We only increment the lock ref count when we see a POSIX lock on a context
1157 that doesn't already have them.
1158 ****************************************************************************/
1160 static void increment_posix_lock_count(const files_struct *fsp,
1161 uint64_t smblctx)
1163 NTSTATUS status;
1164 TDB_DATA ctx_key;
1165 TDB_DATA val = { 0 };
1167 ctx_key.dptr = (uint8_t *)&smblctx;
1168 ctx_key.dsize = sizeof(smblctx);
1171 * Don't increment if we already have any POSIX flavor
1172 * locks on this context.
1174 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1175 return;
1178 /* Remember that we have POSIX flavor locks on this context. */
1179 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1180 SMB_ASSERT(NT_STATUS_IS_OK(status));
1182 increment_lock_ref_count(fsp);
1184 DEBUG(10,("posix_locks set for file %s\n",
1185 fsp_str_dbg(fsp)));
1188 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1190 NTSTATUS status;
1191 TDB_DATA ctx_key;
1193 ctx_key.dptr = (uint8_t *)&smblctx;
1194 ctx_key.dsize = sizeof(smblctx);
1196 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1197 SMB_ASSERT(NT_STATUS_IS_OK(status));
1199 decrement_lock_ref_count(fsp);
1201 DEBUG(10,("posix_locks deleted for file %s\n",
1202 fsp_str_dbg(fsp)));
1205 /****************************************************************************
1206 Return true if any locks exist on the given lock context.
1207 ****************************************************************************/
1209 static bool locks_exist_on_context(const struct lock_struct *plocks,
1210 int num_locks,
1211 const struct lock_context *lock_ctx)
1213 int i;
1215 for (i=0; i < num_locks; i++) {
1216 const struct lock_struct *lock = &plocks[i];
1218 /* Ignore all but read/write locks. */
1219 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1220 continue;
1223 /* Ignore locks not owned by this process. */
1224 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
1225 continue;
1228 if (lock_ctx->smblctx == lock->context.smblctx) {
1229 return true;
1232 return false;
1235 /****************************************************************************
1236 POSIX function to acquire a lock. Returns True if the
1237 lock could be granted, False if not.
1238 As POSIX locks don't stack or conflict (they just overwrite)
1239 we can map the requested lock directly onto a system one. We
1240 know it doesn't conflict with locks on other contexts as the
1241 upper layer would have refused it.
1242 ****************************************************************************/
1244 bool set_posix_lock_posix_flavour(files_struct *fsp,
1245 uint64_t u_offset,
1246 uint64_t u_count,
1247 enum brl_type lock_type,
1248 const struct lock_context *lock_ctx,
1249 int *errno_ret)
1251 off_t offset;
1252 off_t count;
1253 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1255 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1256 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1257 (uintmax_t)u_offset, (uintmax_t)u_count,
1258 posix_lock_type_name(lock_type)));
1261 * If the requested lock won't fit in the POSIX range, we will
1262 * pretend it was successful.
1265 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1266 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1267 return True;
1270 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1271 *errno_ret = errno;
1272 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1273 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1274 return False;
1276 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1277 return True;
1280 /****************************************************************************
1281 POSIX function to release a lock. Returns True if the
1282 lock could be released, False if not.
1283 We are given a complete lock state from the upper layer which is what the lock
1284 state should be after the unlock has already been done, so what
1285 we do is punch out holes in the unlock range where locks owned by this process
1286 have a different lock context.
1287 ****************************************************************************/
1289 bool release_posix_lock_posix_flavour(files_struct *fsp,
1290 uint64_t u_offset,
1291 uint64_t u_count,
1292 const struct lock_context *lock_ctx,
1293 const struct lock_struct *plocks,
1294 int num_locks)
1296 bool ret = True;
1297 off_t offset;
1298 off_t count;
1299 TALLOC_CTX *ul_ctx = NULL;
1300 struct lock_list *ulist = NULL;
1301 struct lock_list *ul = NULL;
1303 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1304 "count = %ju\n", fsp_str_dbg(fsp),
1305 (uintmax_t)u_offset, (uintmax_t)u_count));
1308 * If the requested lock won't fit in the POSIX range, we will
1309 * pretend it was successful.
1312 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1313 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1314 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1316 return True;
1319 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1320 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1321 return False;
1324 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1325 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1326 talloc_destroy(ul_ctx);
1327 return False;
1331 * Create the initial list entry containing the
1332 * lock we want to remove.
1335 ZERO_STRUCTP(ul);
1336 ul->start = offset;
1337 ul->size = count;
1339 DLIST_ADD(ulist, ul);
1342 * Walk the given array creating a linked list
1343 * of unlock requests.
1346 ulist = posix_lock_list(ul_ctx,
1347 ulist,
1348 lock_ctx, /* Lock context ulist belongs to. */
1349 plocks,
1350 num_locks);
1353 * Release the POSIX locks on the list of ranges returned.
1356 for(; ulist; ulist = ulist->next) {
1357 offset = ulist->start;
1358 count = ulist->size;
1360 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1361 "offset = %ju, count = %ju\n",
1362 (uintmax_t)offset, (uintmax_t)count ));
1364 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1365 ret = False;
1369 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1370 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1372 talloc_destroy(ul_ctx);
1373 return ret;