s3: VFS: Change SMB_VFS_GET_QUOTA to use const struct smb_filename * instead of const...
[Samba.git] / source3 / locking / posix.c
blobff794282114af0186121f2d50c933495ec3fb51f
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
32 #undef DBGC_CLASS
33 #define DBGC_CLASS DBGC_LOCKING
36 * The pending close database handle.
39 static struct db_context *posix_pending_close_db;
41 /****************************************************************************
42 First - the functions that deal with the underlying system locks - these
43 functions are used no matter if we're mapping CIFS Windows locks or CIFS
44 POSIX locks onto POSIX.
45 ****************************************************************************/
47 /****************************************************************************
48 Utility function to map a lock type correctly depending on the open
49 mode of a file.
50 ****************************************************************************/
52 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
54 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
56 * Many UNIX's cannot get a write lock on a file opened read-only.
57 * Win32 locking semantics allow this.
58 * Do the best we can and attempt a read-only lock.
60 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
61 return F_RDLCK;
65 * This return should be the most normal, as we attempt
66 * to always open files read/write.
69 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
72 /****************************************************************************
73 Debugging aid :-).
74 ****************************************************************************/
76 static const char *posix_lock_type_name(int lock_type)
78 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
81 /****************************************************************************
82 Check to see if the given unsigned lock range is within the possible POSIX
83 range. Modifies the given args to be in range if possible, just returns
84 False if not.
85 ****************************************************************************/
87 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
88 uint64_t u_offset, uint64_t u_count)
90 off_t offset = (off_t)u_offset;
91 off_t count = (off_t)u_count;
94 * For the type of system we are, attempt to
95 * find the maximum positive lock offset as an off_t.
98 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
100 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
101 #else
103 * In this case off_t is 64 bits,
104 * and the underlying system can handle 64 bit signed locks.
107 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
108 off_t mask = (mask2<<1);
109 off_t max_positive_lock_offset = ~mask;
111 #endif
113 * POSIX locks of length zero mean lock to end-of-file.
114 * Win32 locks of length zero are point probes. Ignore
115 * any Win32 locks of length zero. JRA.
118 if (count == 0) {
119 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
120 return False;
124 * If the given offset was > max_positive_lock_offset then we cannot map this at all
125 * ignore this lock.
128 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
129 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
130 "and we cannot handle this. Ignoring lock.\n",
131 (uintmax_t)u_offset,
132 (uintmax_t)max_positive_lock_offset));
133 return False;
137 * We must truncate the count to less than max_positive_lock_offset.
140 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
141 count = max_positive_lock_offset;
145 * Truncate count to end at max lock offset.
148 if (offset + count < 0 || offset + count > max_positive_lock_offset) {
149 count = max_positive_lock_offset - offset;
153 * If we ate all the count, ignore this lock.
156 if (count == 0) {
157 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
158 "u_offset = %ju, u_count = %ju\n",
159 (uintmax_t)u_offset,
160 (uintmax_t)u_count));
161 return False;
165 * The mapping was successful.
168 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
169 "count_out = %ju\n",
170 (uintmax_t)offset, (uintmax_t)count));
172 *offset_out = offset;
173 *count_out = count;
175 return True;
178 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
179 struct files_struct *fsp, int op, off_t offset,
180 off_t count, int type)
182 VFS_FIND(lock);
183 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
186 /****************************************************************************
187 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
188 broken NFS implementations.
189 ****************************************************************************/
191 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
193 bool ret;
195 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
196 fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
198 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
200 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
202 if ((errno == EINVAL) &&
203 (op != F_GETLK &&
204 op != F_SETLK &&
205 op != F_SETLKW)) {
206 DEBUG(0,("WARNING: OFD locks in use and no kernel "
207 "support. Try setting "
208 "'smbd:force process locks = true' "
209 "in smb.conf\n"));
210 } else {
211 DEBUG(0, ("WARNING: lock request at offset "
212 "%ju, length %ju returned\n",
213 (uintmax_t)offset, (uintmax_t)count));
214 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
215 "lock offsets\n", strerror(errno)));
216 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
220 * If the offset is > 0x7FFFFFFF then this will cause problems on
221 * 32 bit NFS mounted filesystems. Just ignore it.
224 if (offset & ~((off_t)0x7fffffff)) {
225 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
226 return True;
229 if (count & ~((off_t)0x7fffffff)) {
230 /* 32 bit NFS file system, retry with smaller offset */
231 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
232 errno = 0;
233 count &= 0x7fffffff;
234 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
238 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
239 return ret;
242 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
243 struct files_struct *fsp, off_t *poffset,
244 off_t *pcount, int *ptype, pid_t *ppid)
246 VFS_FIND(getlock);
247 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
248 ppid);
251 /****************************************************************************
252 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
253 broken NFS implementations.
254 ****************************************************************************/
256 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
258 pid_t pid;
259 bool ret;
261 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
262 fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
263 *ptype));
265 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
267 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
269 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
270 "offset %ju, length %ju returned\n",
271 (uintmax_t)*poffset, (uintmax_t)*pcount));
272 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
273 "lock offsets\n", strerror(errno)));
274 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
277 * If the offset is > 0x7FFFFFFF then this will cause problems on
278 * 32 bit NFS mounted filesystems. Just ignore it.
281 if (*poffset & ~((off_t)0x7fffffff)) {
282 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
283 return True;
286 if (*pcount & ~((off_t)0x7fffffff)) {
287 /* 32 bit NFS file system, retry with smaller offset */
288 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
289 errno = 0;
290 *pcount &= 0x7fffffff;
291 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
295 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
296 return ret;
299 /****************************************************************************
300 POSIX function to see if a file region is locked. Returns True if the
301 region is locked, False otherwise.
302 ****************************************************************************/
304 bool is_posix_locked(files_struct *fsp,
305 uint64_t *pu_offset,
306 uint64_t *pu_count,
307 enum brl_type *plock_type,
308 enum brl_flavour lock_flav)
310 off_t offset;
311 off_t count;
312 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
314 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
315 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
316 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
319 * If the requested lock won't fit in the POSIX range, we will
320 * never set it, so presume it is not locked.
323 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
324 return False;
327 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
328 return False;
331 if (posix_lock_type == F_UNLCK) {
332 return False;
335 if (lock_flav == POSIX_LOCK) {
336 /* Only POSIX lock queries need to know the details. */
337 *pu_offset = (uint64_t)offset;
338 *pu_count = (uint64_t)count;
339 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
341 return True;
344 /****************************************************************************
345 Next - the functions that deal with in memory database storing representations
346 of either Windows CIFS locks or POSIX CIFS locks.
347 ****************************************************************************/
349 /* The key used in the in-memory POSIX databases. */
351 struct lock_ref_count_key {
352 struct file_id id;
353 char r;
356 /*******************************************************************
357 Form a static locking key for a dev/inode pair for the lock ref count
358 ******************************************************************/
360 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
361 struct lock_ref_count_key *tmp)
363 ZERO_STRUCTP(tmp);
364 tmp->id = fsp->file_id;
365 tmp->r = 'r';
366 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
369 /*******************************************************************
370 Convenience function to get an fd_array key from an fsp.
371 ******************************************************************/
373 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
375 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
378 /*******************************************************************
379 Create the in-memory POSIX lock databases.
380 ********************************************************************/
382 bool posix_locking_init(bool read_only)
384 if (posix_pending_close_db != NULL) {
385 return true;
388 posix_pending_close_db = db_open_rbt(NULL);
390 if (posix_pending_close_db == NULL) {
391 DEBUG(0,("Failed to open POSIX pending close database.\n"));
392 return false;
395 return true;
398 /*******************************************************************
399 Delete the in-memory POSIX lock databases.
400 ********************************************************************/
402 bool posix_locking_end(void)
405 * Shouldn't we close all fd's here?
407 TALLOC_FREE(posix_pending_close_db);
408 return true;
411 /****************************************************************************
412 Next - the functions that deal with storing fd's that have outstanding
413 POSIX locks when closed.
414 ****************************************************************************/
416 /****************************************************************************
417 The records in posix_pending_close_db are composed of an array of
418 ints keyed by dev/ino pair. Those ints are the fd's that were open on
419 this dev/ino pair that should have been closed, but can't as the lock
420 ref count is non zero.
421 ****************************************************************************/
423 /****************************************************************************
424 Keep a reference count of the number of locks open on this dev/ino
425 pair. Creates entry if it doesn't exist.
426 ****************************************************************************/
428 static void increment_lock_ref_count(const files_struct *fsp)
430 struct lock_ref_count_key tmp;
431 int32_t lock_ref_count = 0;
432 NTSTATUS status;
434 status = dbwrap_change_int32_atomic(
435 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
436 &lock_ref_count, 1);
438 SMB_ASSERT(NT_STATUS_IS_OK(status));
439 SMB_ASSERT(lock_ref_count < INT32_MAX);
441 DEBUG(10,("lock_ref_count for file %s = %d\n",
442 fsp_str_dbg(fsp), (int)lock_ref_count));
445 static void decrement_lock_ref_count(const files_struct *fsp)
447 struct lock_ref_count_key tmp;
448 int32_t lock_ref_count = 0;
449 NTSTATUS status;
451 status = dbwrap_change_int32_atomic(
452 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
453 &lock_ref_count, -1);
455 SMB_ASSERT(NT_STATUS_IS_OK(status));
456 SMB_ASSERT(lock_ref_count >= 0);
458 DEBUG(10,("lock_ref_count for file %s = %d\n",
459 fsp_str_dbg(fsp), (int)lock_ref_count));
462 /****************************************************************************
463 Fetch the lock ref count.
464 ****************************************************************************/
466 static int32_t get_lock_ref_count(const files_struct *fsp)
468 struct lock_ref_count_key tmp;
469 NTSTATUS status;
470 int32_t lock_ref_count = 0;
472 status = dbwrap_fetch_int32(
473 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
474 &lock_ref_count);
476 if (!NT_STATUS_IS_OK(status) &&
477 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
478 DEBUG(0, ("Error fetching "
479 "lock ref count for file %s: %s\n",
480 fsp_str_dbg(fsp), nt_errstr(status)));
482 return lock_ref_count;
485 /****************************************************************************
486 Delete a lock_ref_count entry.
487 ****************************************************************************/
489 static void delete_lock_ref_count(const files_struct *fsp)
491 struct lock_ref_count_key tmp;
493 /* Not a bug if it doesn't exist - no locks were ever granted. */
495 dbwrap_delete(posix_pending_close_db,
496 locking_ref_count_key_fsp(fsp, &tmp));
498 DEBUG(10,("delete_lock_ref_count for file %s\n",
499 fsp_str_dbg(fsp)));
502 /****************************************************************************
503 Add an fd to the pending close tdb.
504 ****************************************************************************/
506 static void add_fd_to_close_entry(const files_struct *fsp)
508 struct db_record *rec;
509 int *fds;
510 size_t num_fds;
511 NTSTATUS status;
512 TDB_DATA value;
514 rec = dbwrap_fetch_locked(
515 posix_pending_close_db, talloc_tos(),
516 fd_array_key_fsp(fsp));
518 SMB_ASSERT(rec != NULL);
520 value = dbwrap_record_get_value(rec);
521 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
523 num_fds = value.dsize / sizeof(int);
524 fds = talloc_array(rec, int, num_fds+1);
526 SMB_ASSERT(fds != NULL);
528 memcpy(fds, value.dptr, value.dsize);
529 fds[num_fds] = fsp->fh->fd;
531 status = dbwrap_record_store(
532 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
534 SMB_ASSERT(NT_STATUS_IS_OK(status));
536 TALLOC_FREE(rec);
538 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
539 fsp->fh->fd, fsp_str_dbg(fsp)));
542 /****************************************************************************
543 Remove all fd entries for a specific dev/inode pair from the tdb.
544 ****************************************************************************/
546 static void delete_close_entries(const files_struct *fsp)
548 struct db_record *rec;
550 rec = dbwrap_fetch_locked(
551 posix_pending_close_db, talloc_tos(),
552 fd_array_key_fsp(fsp));
554 SMB_ASSERT(rec != NULL);
555 dbwrap_record_delete(rec);
556 TALLOC_FREE(rec);
559 /****************************************************************************
560 Get the array of POSIX pending close records for an open fsp. Returns number
561 of entries.
562 ****************************************************************************/
564 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
565 const files_struct *fsp,
566 int **entries)
568 TDB_DATA dbuf;
569 NTSTATUS status;
571 status = dbwrap_fetch(
572 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
573 &dbuf);
575 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
576 *entries = NULL;
577 return 0;
580 SMB_ASSERT(NT_STATUS_IS_OK(status));
582 if (dbuf.dsize == 0) {
583 *entries = NULL;
584 return 0;
587 *entries = (int *)dbuf.dptr;
588 return (size_t)(dbuf.dsize / sizeof(int));
591 /****************************************************************************
592 Deal with pending closes needed by POSIX locking support.
593 Note that posix_locking_close_file() is expected to have been called
594 to delete all locks on this fsp before this function is called.
595 ****************************************************************************/
597 int fd_close_posix(const struct files_struct *fsp)
599 int saved_errno = 0;
600 int ret;
601 int *fd_array = NULL;
602 size_t count, i;
604 if (!lp_locking(fsp->conn->params) ||
605 !lp_posix_locking(fsp->conn->params) ||
606 fsp->use_ofd_locks)
609 * No locking or POSIX to worry about or we are using POSIX
610 * open file description lock semantics which only removes
611 * locks on the file descriptor we're closing. Just close.
613 return close(fsp->fh->fd);
616 if (get_lock_ref_count(fsp)) {
619 * There are outstanding locks on this dev/inode pair on
620 * other fds. Add our fd to the pending close tdb and set
621 * fsp->fh->fd to -1.
624 add_fd_to_close_entry(fsp);
625 return 0;
629 * No outstanding locks. Get the pending close fd's
630 * from the tdb and close them all.
633 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
635 if (count) {
636 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
637 (unsigned int)count));
639 for(i = 0; i < count; i++) {
640 if (close(fd_array[i]) == -1) {
641 saved_errno = errno;
646 * Delete all fd's stored in the tdb
647 * for this dev/inode pair.
650 delete_close_entries(fsp);
653 TALLOC_FREE(fd_array);
655 /* Don't need a lock ref count on this dev/ino anymore. */
656 delete_lock_ref_count(fsp);
659 * Finally close the fd associated with this fsp.
662 ret = close(fsp->fh->fd);
664 if (ret == 0 && saved_errno != 0) {
665 errno = saved_errno;
666 ret = -1;
669 return ret;
672 /****************************************************************************
673 Next - the functions that deal with the mapping CIFS Windows locks onto
674 the underlying system POSIX locks.
675 ****************************************************************************/
678 * Structure used when splitting a lock range
679 * into a POSIX lock range. Doubly linked list.
682 struct lock_list {
683 struct lock_list *next;
684 struct lock_list *prev;
685 off_t start;
686 off_t size;
689 /****************************************************************************
690 Create a list of lock ranges that don't overlap a given range. Used in calculating
691 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
692 understand it :-).
693 ****************************************************************************/
695 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
696 struct lock_list *lhead,
697 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
698 const struct lock_struct *plocks,
699 int num_locks)
701 int i;
704 * Check the current lock list on this dev/inode pair.
705 * Quit if the list is deleted.
708 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
709 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
711 for (i=0; i<num_locks && lhead; i++) {
712 const struct lock_struct *lock = &plocks[i];
713 struct lock_list *l_curr;
715 /* Ignore all but read/write locks. */
716 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
717 continue;
720 /* Ignore locks not owned by this process. */
721 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
722 continue;
726 * Walk the lock list, checking for overlaps. Note that
727 * the lock list can expand within this loop if the current
728 * range being examined needs to be split.
731 for (l_curr = lhead; l_curr;) {
733 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
734 "start=%ju,size=%ju:type=%s",
735 (uintmax_t)lock->fnum,
736 (uintmax_t)lock->start,
737 (uintmax_t)lock->size,
738 posix_lock_type_name(lock->lock_type) ));
740 if ( (l_curr->start >= (lock->start + lock->size)) ||
741 (lock->start >= (l_curr->start + l_curr->size))) {
743 /* No overlap with existing lock - leave this range alone. */
744 /*********************************************
745 +---------+
746 | l_curr |
747 +---------+
748 +-------+
749 | lock |
750 +-------+
751 OR....
752 +---------+
753 | l_curr |
754 +---------+
755 **********************************************/
757 DEBUG(10,(" no overlap case.\n" ));
759 l_curr = l_curr->next;
761 } else if ( (l_curr->start >= lock->start) &&
762 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
765 * This range is completely overlapped by this existing lock range
766 * and thus should have no effect. Delete it from the list.
768 /*********************************************
769 +---------+
770 | l_curr |
771 +---------+
772 +---------------------------+
773 | lock |
774 +---------------------------+
775 **********************************************/
776 /* Save the next pointer */
777 struct lock_list *ul_next = l_curr->next;
779 DEBUG(10,(" delete case.\n" ));
781 DLIST_REMOVE(lhead, l_curr);
782 if(lhead == NULL) {
783 break; /* No more list... */
786 l_curr = ul_next;
788 } else if ( (l_curr->start >= lock->start) &&
789 (l_curr->start < lock->start + lock->size) &&
790 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
793 * This range overlaps the existing lock range at the high end.
794 * Truncate by moving start to existing range end and reducing size.
796 /*********************************************
797 +---------------+
798 | l_curr |
799 +---------------+
800 +---------------+
801 | lock |
802 +---------------+
803 BECOMES....
804 +-------+
805 | l_curr|
806 +-------+
807 **********************************************/
809 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
810 l_curr->start = lock->start + lock->size;
812 DEBUG(10, (" truncate high case: start=%ju,"
813 "size=%ju\n",
814 (uintmax_t)l_curr->start,
815 (uintmax_t)l_curr->size ));
817 l_curr = l_curr->next;
819 } else if ( (l_curr->start < lock->start) &&
820 (l_curr->start + l_curr->size > lock->start) &&
821 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
824 * This range overlaps the existing lock range at the low end.
825 * Truncate by reducing size.
827 /*********************************************
828 +---------------+
829 | l_curr |
830 +---------------+
831 +---------------+
832 | lock |
833 +---------------+
834 BECOMES....
835 +-------+
836 | l_curr|
837 +-------+
838 **********************************************/
840 l_curr->size = lock->start - l_curr->start;
842 DEBUG(10, (" truncate low case: start=%ju,"
843 "size=%ju\n",
844 (uintmax_t)l_curr->start,
845 (uintmax_t)l_curr->size ));
847 l_curr = l_curr->next;
849 } else if ( (l_curr->start < lock->start) &&
850 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
852 * Worst case scenario. Range completely overlaps an existing
853 * lock range. Split the request into two, push the new (upper) request
854 * into the dlink list, and continue with the entry after l_new (as we
855 * know that l_new will not overlap with this lock).
857 /*********************************************
858 +---------------------------+
859 | l_curr |
860 +---------------------------+
861 +---------+
862 | lock |
863 +---------+
864 BECOMES.....
865 +-------+ +---------+
866 | l_curr| | l_new |
867 +-------+ +---------+
868 **********************************************/
869 struct lock_list *l_new = talloc(ctx, struct lock_list);
871 if(l_new == NULL) {
872 DEBUG(0,("posix_lock_list: talloc fail.\n"));
873 return NULL; /* The talloc_destroy takes care of cleanup. */
876 ZERO_STRUCTP(l_new);
877 l_new->start = lock->start + lock->size;
878 l_new->size = l_curr->start + l_curr->size - l_new->start;
880 /* Truncate the l_curr. */
881 l_curr->size = lock->start - l_curr->start;
883 DEBUG(10, (" split case: curr: start=%ju,"
884 "size=%ju new: start=%ju,"
885 "size=%ju\n",
886 (uintmax_t)l_curr->start,
887 (uintmax_t)l_curr->size,
888 (uintmax_t)l_new->start,
889 (uintmax_t)l_new->size ));
892 * Add into the dlink list after the l_curr point - NOT at lhead.
894 DLIST_ADD_AFTER(lhead, l_new, l_curr);
896 /* And move after the link we added. */
897 l_curr = l_new->next;
899 } else {
902 * This logic case should never happen. Ensure this is the
903 * case by forcing an abort.... Remove in production.
905 char *msg = NULL;
907 if (asprintf(&msg, "logic flaw in cases: "
908 "l_curr: start = %ju, "
909 "size = %ju : lock: "
910 "start = %ju, size = %ju",
911 (uintmax_t)l_curr->start,
912 (uintmax_t)l_curr->size,
913 (uintmax_t)lock->start,
914 (uintmax_t)lock->size ) != -1) {
915 smb_panic(msg);
916 } else {
917 smb_panic("posix_lock_list");
920 } /* end for ( l_curr = lhead; l_curr;) */
921 } /* end for (i=0; i<num_locks && ul_head; i++) */
923 return lhead;
926 /****************************************************************************
927 POSIX function to acquire a lock. Returns True if the
928 lock could be granted, False if not.
929 ****************************************************************************/
931 bool set_posix_lock_windows_flavour(files_struct *fsp,
932 uint64_t u_offset,
933 uint64_t u_count,
934 enum brl_type lock_type,
935 const struct lock_context *lock_ctx,
936 const struct lock_struct *plocks,
937 int num_locks,
938 int *errno_ret)
940 off_t offset;
941 off_t count;
942 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
943 bool ret = True;
944 size_t lock_count;
945 TALLOC_CTX *l_ctx = NULL;
946 struct lock_list *llist = NULL;
947 struct lock_list *ll = NULL;
949 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
950 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
951 (uintmax_t)u_offset, (uintmax_t)u_count,
952 posix_lock_type_name(lock_type)));
955 * If the requested lock won't fit in the POSIX range, we will
956 * pretend it was successful.
959 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
960 increment_lock_ref_count(fsp);
961 return True;
965 * Windows is very strange. It allows read locks to be overlayed
966 * (even over a write lock), but leaves the write lock in force until the first
967 * unlock. It also reference counts the locks. This means the following sequence :
969 * process1 process2
970 * ------------------------------------------------------------------------
971 * WRITE LOCK : start = 2, len = 10
972 * READ LOCK: start =0, len = 10 - FAIL
973 * READ LOCK : start = 0, len = 14
974 * READ LOCK: start =0, len = 10 - FAIL
975 * UNLOCK : start = 2, len = 10
976 * READ LOCK: start =0, len = 10 - OK
978 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
979 * would leave a single read lock over the 0-14 region.
982 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
983 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
984 return False;
987 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
988 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
989 talloc_destroy(l_ctx);
990 return False;
994 * Create the initial list entry containing the
995 * lock we want to add.
998 ZERO_STRUCTP(ll);
999 ll->start = offset;
1000 ll->size = count;
1002 DLIST_ADD(llist, ll);
1005 * The following call calculates if there are any
1006 * overlapping locks held by this process on
1007 * fd's open on the same file and splits this list
1008 * into a list of lock ranges that do not overlap with existing
1009 * POSIX locks.
1012 llist = posix_lock_list(l_ctx,
1013 llist,
1014 lock_ctx, /* Lock context llist belongs to. */
1015 plocks,
1016 num_locks);
1019 * Add the POSIX locks on the list of ranges returned.
1020 * As the lock is supposed to be added atomically, we need to
1021 * back out all the locks if any one of these calls fail.
1024 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1025 offset = ll->start;
1026 count = ll->size;
1028 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
1029 "Type = %s: offset = %ju, count = %ju\n",
1030 posix_lock_type_name(posix_lock_type),
1031 (uintmax_t)offset, (uintmax_t)count ));
1033 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1034 *errno_ret = errno;
1035 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
1036 "fail !: Type = %s: offset = %ju, "
1037 "count = %ju. Errno = %s\n",
1038 posix_lock_type_name(posix_lock_type),
1039 (uintmax_t)offset, (uintmax_t)count,
1040 strerror(errno) ));
1041 ret = False;
1042 break;
1046 if (!ret) {
1049 * Back out all the POSIX locks we have on fail.
1052 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1053 offset = ll->start;
1054 count = ll->size;
1056 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1057 "out locks: Type = %s: offset = %ju, "
1058 "count = %ju\n",
1059 posix_lock_type_name(posix_lock_type),
1060 (uintmax_t)offset, (uintmax_t)count ));
1062 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1064 } else {
1065 /* Remember the number of locks we have on this dev/ino pair. */
1066 increment_lock_ref_count(fsp);
1069 talloc_destroy(l_ctx);
1070 return ret;
1073 /****************************************************************************
1074 POSIX function to release a lock. Returns True if the
1075 lock could be released, False if not.
1076 ****************************************************************************/
1078 bool release_posix_lock_windows_flavour(files_struct *fsp,
1079 uint64_t u_offset,
1080 uint64_t u_count,
1081 enum brl_type deleted_lock_type,
1082 const struct lock_context *lock_ctx,
1083 const struct lock_struct *plocks,
1084 int num_locks)
1086 off_t offset;
1087 off_t count;
1088 bool ret = True;
1089 TALLOC_CTX *ul_ctx = NULL;
1090 struct lock_list *ulist = NULL;
1091 struct lock_list *ul = NULL;
1093 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1094 "count = %ju\n", fsp_str_dbg(fsp),
1095 (uintmax_t)u_offset, (uintmax_t)u_count));
1097 /* Remember the number of locks we have on this dev/ino pair. */
1098 decrement_lock_ref_count(fsp);
1101 * If the requested lock won't fit in the POSIX range, we will
1102 * pretend it was successful.
1105 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1106 return True;
1109 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1110 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1111 return False;
1114 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1115 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1116 talloc_destroy(ul_ctx);
1117 return False;
1121 * Create the initial list entry containing the
1122 * lock we want to remove.
1125 ZERO_STRUCTP(ul);
1126 ul->start = offset;
1127 ul->size = count;
1129 DLIST_ADD(ulist, ul);
1132 * The following call calculates if there are any
1133 * overlapping locks held by this process on
1134 * fd's open on the same file and creates a
1135 * list of unlock ranges that will allow
1136 * POSIX lock ranges to remain on the file whilst the
1137 * unlocks are performed.
1140 ulist = posix_lock_list(ul_ctx,
1141 ulist,
1142 lock_ctx, /* Lock context ulist belongs to. */
1143 plocks,
1144 num_locks);
1147 * If there were any overlapped entries (list is > 1 or size or start have changed),
1148 * and the lock_type we just deleted from
1149 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1150 * the POSIX lock to a read lock. This allows any overlapping read locks
1151 * to be atomically maintained.
1154 if (deleted_lock_type == WRITE_LOCK &&
1155 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1157 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1158 "lock to READ: offset = %ju, count = %ju\n",
1159 (uintmax_t)offset, (uintmax_t)count ));
1161 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1162 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1163 talloc_destroy(ul_ctx);
1164 return False;
1169 * Release the POSIX locks on the list of ranges returned.
1172 for(; ulist; ulist = ulist->next) {
1173 offset = ulist->start;
1174 count = ulist->size;
1176 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1177 "offset = %ju, count = %ju\n",
1178 (uintmax_t)offset, (uintmax_t)count ));
1180 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1181 ret = False;
1185 talloc_destroy(ul_ctx);
1186 return ret;
1189 /****************************************************************************
1190 Next - the functions that deal with mapping CIFS POSIX locks onto
1191 the underlying system POSIX locks.
1192 ****************************************************************************/
1194 /****************************************************************************
1195 We only increment the lock ref count when we see a POSIX lock on a context
1196 that doesn't already have them.
1197 ****************************************************************************/
1199 static void increment_posix_lock_count(const files_struct *fsp,
1200 uint64_t smblctx)
1202 NTSTATUS status;
1203 TDB_DATA ctx_key;
1204 TDB_DATA val = { 0 };
1206 ctx_key.dptr = (uint8_t *)&smblctx;
1207 ctx_key.dsize = sizeof(smblctx);
1210 * Don't increment if we already have any POSIX flavor
1211 * locks on this context.
1213 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1214 return;
1217 /* Remember that we have POSIX flavor locks on this context. */
1218 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1219 SMB_ASSERT(NT_STATUS_IS_OK(status));
1221 increment_lock_ref_count(fsp);
1223 DEBUG(10,("posix_locks set for file %s\n",
1224 fsp_str_dbg(fsp)));
1227 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1229 NTSTATUS status;
1230 TDB_DATA ctx_key;
1232 ctx_key.dptr = (uint8_t *)&smblctx;
1233 ctx_key.dsize = sizeof(smblctx);
1235 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1236 SMB_ASSERT(NT_STATUS_IS_OK(status));
1238 decrement_lock_ref_count(fsp);
1240 DEBUG(10,("posix_locks deleted for file %s\n",
1241 fsp_str_dbg(fsp)));
1244 /****************************************************************************
1245 Return true if any locks exist on the given lock context.
1246 ****************************************************************************/
1248 static bool locks_exist_on_context(const struct lock_struct *plocks,
1249 int num_locks,
1250 const struct lock_context *lock_ctx)
1252 int i;
1254 for (i=0; i < num_locks; i++) {
1255 const struct lock_struct *lock = &plocks[i];
1257 /* Ignore all but read/write locks. */
1258 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1259 continue;
1262 /* Ignore locks not owned by this process. */
1263 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
1264 continue;
1267 if (lock_ctx->smblctx == lock->context.smblctx) {
1268 return true;
1271 return false;
1274 /****************************************************************************
1275 POSIX function to acquire a lock. Returns True if the
1276 lock could be granted, False if not.
1277 As POSIX locks don't stack or conflict (they just overwrite)
1278 we can map the requested lock directly onto a system one. We
1279 know it doesn't conflict with locks on other contexts as the
1280 upper layer would have refused it.
1281 ****************************************************************************/
1283 bool set_posix_lock_posix_flavour(files_struct *fsp,
1284 uint64_t u_offset,
1285 uint64_t u_count,
1286 enum brl_type lock_type,
1287 const struct lock_context *lock_ctx,
1288 int *errno_ret)
1290 off_t offset;
1291 off_t count;
1292 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1294 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1295 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1296 (uintmax_t)u_offset, (uintmax_t)u_count,
1297 posix_lock_type_name(lock_type)));
1300 * If the requested lock won't fit in the POSIX range, we will
1301 * pretend it was successful.
1304 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1305 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1306 return True;
1309 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1310 *errno_ret = errno;
1311 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1312 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1313 return False;
1315 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1316 return True;
1319 /****************************************************************************
1320 POSIX function to release a lock. Returns True if the
1321 lock could be released, False if not.
1322 We are given a complete lock state from the upper layer which is what the lock
1323 state should be after the unlock has already been done, so what
1324 we do is punch out holes in the unlock range where locks owned by this process
1325 have a different lock context.
1326 ****************************************************************************/
1328 bool release_posix_lock_posix_flavour(files_struct *fsp,
1329 uint64_t u_offset,
1330 uint64_t u_count,
1331 const struct lock_context *lock_ctx,
1332 const struct lock_struct *plocks,
1333 int num_locks)
1335 bool ret = True;
1336 off_t offset;
1337 off_t count;
1338 TALLOC_CTX *ul_ctx = NULL;
1339 struct lock_list *ulist = NULL;
1340 struct lock_list *ul = NULL;
1342 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1343 "count = %ju\n", fsp_str_dbg(fsp),
1344 (uintmax_t)u_offset, (uintmax_t)u_count));
1347 * If the requested lock won't fit in the POSIX range, we will
1348 * pretend it was successful.
1351 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1352 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1353 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1355 return True;
1358 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1359 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1360 return False;
1363 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1364 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1365 talloc_destroy(ul_ctx);
1366 return False;
1370 * Create the initial list entry containing the
1371 * lock we want to remove.
1374 ZERO_STRUCTP(ul);
1375 ul->start = offset;
1376 ul->size = count;
1378 DLIST_ADD(ulist, ul);
1381 * Walk the given array creating a linked list
1382 * of unlock requests.
1385 ulist = posix_lock_list(ul_ctx,
1386 ulist,
1387 lock_ctx, /* Lock context ulist belongs to. */
1388 plocks,
1389 num_locks);
1392 * Release the POSIX locks on the list of ranges returned.
1395 for(; ulist; ulist = ulist->next) {
1396 offset = ulist->start;
1397 count = ulist->size;
1399 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1400 "offset = %ju, count = %ju\n",
1401 (uintmax_t)offset, (uintmax_t)count ));
1403 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1404 ret = False;
1408 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1409 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1411 talloc_destroy(ul_ctx);
1412 return ret;