smbclient: Use cli_readlink
[Samba.git] / source3 / locking / posix.c
blob5aad427a218b19823ba5654a62e3e5db8b1b4b73
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "lib/util/server_id.h"
27 #include "locking/proto.h"
28 #include "dbwrap/dbwrap.h"
29 #include "dbwrap/dbwrap_rbt.h"
30 #include "util_tdb.h"
31 #include "smbd/fd_handle.h"
33 #undef DBGC_CLASS
34 #define DBGC_CLASS DBGC_LOCKING
37 * The pending close database handle.
40 static struct db_context *posix_pending_close_db;
42 /****************************************************************************
43 First - the functions that deal with the underlying system locks - these
44 functions are used no matter if we're mapping CIFS Windows locks or CIFS
45 POSIX locks onto POSIX.
46 ****************************************************************************/
48 /****************************************************************************
49 Utility function to map a lock type correctly depending on the open
50 mode of a file.
51 ****************************************************************************/
53 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
55 if ((lock_type == WRITE_LOCK) && !fsp->fsp_flags.can_write) {
57 * Many UNIX's cannot get a write lock on a file opened read-only.
58 * Win32 locking semantics allow this.
59 * Do the best we can and attempt a read-only lock.
61 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
62 return F_RDLCK;
66 * This return should be the most normal, as we attempt
67 * to always open files read/write.
70 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
73 /****************************************************************************
74 Debugging aid :-).
75 ****************************************************************************/
77 static const char *posix_lock_type_name(int lock_type)
79 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
82 /****************************************************************************
83 Check to see if the given unsigned lock range is within the possible POSIX
84 range. Modifies the given args to be in range if possible, just returns
85 False if not.
86 ****************************************************************************/
88 #define SMB_OFF_T_BITS (sizeof(off_t)*8)
90 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
91 uint64_t u_offset, uint64_t u_count)
93 off_t offset = (off_t)u_offset;
94 off_t count = (off_t)u_count;
97 * For the type of system we are, attempt to
98 * find the maximum positive lock offset as an off_t.
101 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
103 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
104 #else
106 * In this case off_t is 64 bits,
107 * and the underlying system can handle 64 bit signed locks.
110 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
111 off_t mask = (mask2<<1);
112 off_t max_positive_lock_offset = ~mask;
114 #endif
116 * POSIX locks of length zero mean lock to end-of-file.
117 * Win32 locks of length zero are point probes. Ignore
118 * any Win32 locks of length zero. JRA.
121 if (count == 0) {
122 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
123 return False;
127 * If the given offset was > max_positive_lock_offset then we cannot map this at all
128 * ignore this lock.
131 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
132 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
133 "and we cannot handle this. Ignoring lock.\n",
134 (uintmax_t)u_offset,
135 (uintmax_t)max_positive_lock_offset));
136 return False;
140 * We must truncate the count to less than max_positive_lock_offset.
143 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
144 count = max_positive_lock_offset;
148 * Truncate count to end at max lock offset.
151 if (offset > INT64_MAX - count ||
152 offset + count > max_positive_lock_offset) {
153 count = max_positive_lock_offset - offset;
157 * If we ate all the count, ignore this lock.
160 if (count == 0) {
161 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
162 "u_offset = %ju, u_count = %ju\n",
163 (uintmax_t)u_offset,
164 (uintmax_t)u_count));
165 return False;
169 * The mapping was successful.
172 DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
173 "count_out = %ju\n",
174 (uintmax_t)offset, (uintmax_t)count));
176 *offset_out = offset;
177 *count_out = count;
179 return True;
182 /****************************************************************************
183 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
184 broken NFS implementations.
185 ****************************************************************************/
187 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
189 bool ret;
191 DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
192 fsp_get_io_fd(fsp),op,(intmax_t)offset,(intmax_t)count,type));
194 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
196 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
198 if ((errno == EINVAL) &&
199 (op != F_GETLK &&
200 op != F_SETLK &&
201 op != F_SETLKW)) {
202 DEBUG(0,("WARNING: OFD locks in use and no kernel "
203 "support. Try setting "
204 "'smbd:force process locks = true' "
205 "in smb.conf\n"));
206 } else {
207 DEBUG(0, ("WARNING: lock request at offset "
208 "%ju, length %ju returned\n",
209 (uintmax_t)offset, (uintmax_t)count));
210 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
211 "lock offsets\n", strerror(errno)));
212 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
216 * If the offset is > 0x7FFFFFFF then this will cause problems on
217 * 32 bit NFS mounted filesystems. Just ignore it.
220 if (offset & ~((off_t)0x7fffffff)) {
221 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
222 return True;
225 if (count & ~((off_t)0x7fffffff)) {
226 /* 32 bit NFS file system, retry with smaller offset */
227 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
228 errno = 0;
229 count &= 0x7fffffff;
230 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
234 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
235 return ret;
238 /****************************************************************************
239 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
240 broken NFS implementations.
241 ****************************************************************************/
243 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
245 pid_t pid;
246 bool ret;
248 DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
249 fsp_get_io_fd(fsp), (uintmax_t)*poffset, (uintmax_t)*pcount,
250 *ptype));
252 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
254 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
256 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
257 "offset %ju, length %ju returned\n",
258 (uintmax_t)*poffset, (uintmax_t)*pcount));
259 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
260 "lock offsets\n", strerror(errno)));
261 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
264 * If the offset is > 0x7FFFFFFF then this will cause problems on
265 * 32 bit NFS mounted filesystems. Just ignore it.
268 if (*poffset & ~((off_t)0x7fffffff)) {
269 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
270 return True;
273 if (*pcount & ~((off_t)0x7fffffff)) {
274 /* 32 bit NFS file system, retry with smaller offset */
275 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
276 errno = 0;
277 *pcount &= 0x7fffffff;
278 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
282 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
283 return ret;
286 /****************************************************************************
287 POSIX function to see if a file region is locked. Returns True if the
288 region is locked, False otherwise.
289 ****************************************************************************/
291 bool is_posix_locked(files_struct *fsp,
292 uint64_t *pu_offset,
293 uint64_t *pu_count,
294 enum brl_type *plock_type,
295 enum brl_flavour lock_flav)
297 off_t offset;
298 off_t count;
299 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
301 DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
302 "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
303 (uintmax_t)*pu_count, posix_lock_type_name(*plock_type)));
306 * If the requested lock won't fit in the POSIX range, we will
307 * never set it, so presume it is not locked.
310 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
311 return False;
314 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
315 return False;
318 if (posix_lock_type == F_UNLCK) {
319 return False;
322 if (lock_flav == POSIX_LOCK) {
323 /* Only POSIX lock queries need to know the details. */
324 *pu_offset = (uint64_t)offset;
325 *pu_count = (uint64_t)count;
326 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
328 return True;
331 /****************************************************************************
332 Next - the functions that deal with in memory database storing representations
333 of either Windows CIFS locks or POSIX CIFS locks.
334 ****************************************************************************/
336 /* The key used in the in-memory POSIX databases. */
338 struct lock_ref_count_key {
339 struct file_id id;
340 char r;
343 /*******************************************************************
344 Form a static locking key for a dev/inode pair for the lock ref count
345 ******************************************************************/
347 static TDB_DATA locking_ref_count_key_fsp(const files_struct *fsp,
348 struct lock_ref_count_key *tmp)
350 ZERO_STRUCTP(tmp);
351 tmp->id = fsp->file_id;
352 tmp->r = 'r';
353 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
356 /*******************************************************************
357 Convenience function to get an fd_array key from an fsp.
358 ******************************************************************/
360 static TDB_DATA fd_array_key_fsp(const files_struct *fsp)
362 return make_tdb_data((const uint8_t *)&fsp->file_id, sizeof(fsp->file_id));
365 /*******************************************************************
366 Create the in-memory POSIX lock databases.
367 ********************************************************************/
369 bool posix_locking_init(bool read_only)
371 if (posix_pending_close_db != NULL) {
372 return true;
375 posix_pending_close_db = db_open_rbt(NULL);
377 if (posix_pending_close_db == NULL) {
378 DEBUG(0,("Failed to open POSIX pending close database.\n"));
379 return false;
382 return true;
385 /*******************************************************************
386 Delete the in-memory POSIX lock databases.
387 ********************************************************************/
389 bool posix_locking_end(void)
392 * Shouldn't we close all fd's here?
394 TALLOC_FREE(posix_pending_close_db);
395 return true;
398 /****************************************************************************
399 Next - the functions that deal with reference count of number of locks open
400 on a dev/ino pair.
401 ****************************************************************************/
403 /****************************************************************************
404 Increase the lock ref count. Creates lock_ref_count entry if it doesn't exist.
405 ****************************************************************************/
407 static void increment_lock_ref_count(const files_struct *fsp)
409 struct lock_ref_count_key tmp;
410 int32_t lock_ref_count = 0;
411 NTSTATUS status;
413 status = dbwrap_change_int32_atomic(
414 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
415 &lock_ref_count, 1);
417 SMB_ASSERT(NT_STATUS_IS_OK(status));
418 SMB_ASSERT(lock_ref_count < INT32_MAX);
420 DEBUG(10,("lock_ref_count for file %s = %d\n",
421 fsp_str_dbg(fsp), (int)(lock_ref_count + 1)));
424 /****************************************************************************
425 Reduce the lock ref count.
426 ****************************************************************************/
428 static void decrement_lock_ref_count(const files_struct *fsp)
430 struct lock_ref_count_key tmp;
431 int32_t lock_ref_count = 0;
432 NTSTATUS status;
434 status = dbwrap_change_int32_atomic(
435 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
436 &lock_ref_count, -1);
438 SMB_ASSERT(NT_STATUS_IS_OK(status));
439 SMB_ASSERT(lock_ref_count > 0);
441 DEBUG(10,("lock_ref_count for file %s = %d\n",
442 fsp_str_dbg(fsp), (int)(lock_ref_count - 1)));
445 /****************************************************************************
446 Fetch the lock ref count.
447 ****************************************************************************/
449 static int32_t get_lock_ref_count(const files_struct *fsp)
451 struct lock_ref_count_key tmp;
452 NTSTATUS status;
453 int32_t lock_ref_count = 0;
455 status = dbwrap_fetch_int32(
456 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
457 &lock_ref_count);
459 if (!NT_STATUS_IS_OK(status) &&
460 !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
461 DEBUG(0, ("Error fetching "
462 "lock ref count for file %s: %s\n",
463 fsp_str_dbg(fsp), nt_errstr(status)));
465 return lock_ref_count;
468 /****************************************************************************
469 Delete a lock_ref_count entry.
470 ****************************************************************************/
472 static void delete_lock_ref_count(const files_struct *fsp)
474 struct lock_ref_count_key tmp;
476 /* Not a bug if it doesn't exist - no locks were ever granted. */
478 dbwrap_delete(posix_pending_close_db,
479 locking_ref_count_key_fsp(fsp, &tmp));
481 DEBUG(10,("delete_lock_ref_count for file %s\n",
482 fsp_str_dbg(fsp)));
485 /****************************************************************************
486 Next - the functions that deal with storing fd's that have outstanding
487 POSIX locks when closed.
488 ****************************************************************************/
490 /****************************************************************************
491 The records in posix_pending_close_db are composed of an array of
492 ints keyed by dev/ino pair. Those ints are the fd's that were open on
493 this dev/ino pair that should have been closed, but can't as the lock
494 ref count is non zero.
495 ****************************************************************************/
497 struct add_fd_to_close_entry_state {
498 const struct files_struct *fsp;
501 static void add_fd_to_close_entry_fn(
502 struct db_record *rec,
503 TDB_DATA value,
504 void *private_data)
506 struct add_fd_to_close_entry_state *state = private_data;
507 int fd = fsp_get_pathref_fd(state->fsp);
508 TDB_DATA values[] = {
509 value,
510 { .dptr = (uint8_t *)&fd,
511 .dsize = sizeof(fd) },
513 NTSTATUS status;
515 SMB_ASSERT((values[0].dsize % sizeof(int)) == 0);
517 status = dbwrap_record_storev(rec, values, ARRAY_SIZE(values), 0);
518 SMB_ASSERT(NT_STATUS_IS_OK(status));
521 /****************************************************************************
522 Add an fd to the pending close db.
523 ****************************************************************************/
525 static void add_fd_to_close_entry(const files_struct *fsp)
527 struct add_fd_to_close_entry_state state = { .fsp = fsp };
528 NTSTATUS status;
530 status = dbwrap_do_locked(
531 posix_pending_close_db,
532 fd_array_key_fsp(fsp),
533 add_fd_to_close_entry_fn,
534 &state);
535 SMB_ASSERT(NT_STATUS_IS_OK(status));
537 DBG_DEBUG("added fd %d file %s\n",
538 fsp_get_pathref_fd(fsp),
539 fsp_str_dbg(fsp));
542 static void fd_close_posix_fn(
543 struct db_record *rec,
544 TDB_DATA data,
545 void *private_data)
547 size_t num_fds, i;
549 SMB_ASSERT((data.dsize % sizeof(int)) == 0);
550 num_fds = data.dsize / sizeof(int);
552 for (i=0; i<num_fds; i++) {
553 int fd;
554 memcpy(&fd, data.dptr, sizeof(int));
555 close(fd);
556 data.dptr += sizeof(int);
558 dbwrap_record_delete(rec);
561 /****************************************************************************
562 Deal with pending closes needed by POSIX locking support.
563 Note that locking_close_file() is expected to have been called
564 to delete all locks on this fsp before this function is called.
565 ****************************************************************************/
567 int fd_close_posix(const struct files_struct *fsp)
569 NTSTATUS status;
571 if (!lp_locking(fsp->conn->params) ||
572 !lp_posix_locking(fsp->conn->params) ||
573 fsp->fsp_flags.use_ofd_locks)
576 * No locking or POSIX to worry about or we are using POSIX
577 * open file description lock semantics which only removes
578 * locks on the file descriptor we're closing. Just close.
580 return close(fsp_get_pathref_fd(fsp));
583 if (get_lock_ref_count(fsp)) {
586 * There are outstanding locks on this dev/inode pair on
587 * other fds. Add our fd to the pending close db. We also
588 * set fsp_get_io_fd(fsp) to -1 inside fd_close() after returning
589 * from VFS layer.
592 add_fd_to_close_entry(fsp);
593 return 0;
596 status = dbwrap_do_locked(
597 posix_pending_close_db,
598 fd_array_key_fsp(fsp),
599 fd_close_posix_fn,
600 NULL);
601 if (!NT_STATUS_IS_OK(status)) {
602 DBG_WARNING("dbwrap_do_locked failed: %s\n",
603 nt_errstr(status));
606 /* Don't need a lock ref count on this dev/ino anymore. */
607 delete_lock_ref_count(fsp);
610 * Finally close the fd associated with this fsp.
613 return close(fsp_get_pathref_fd(fsp));
616 /****************************************************************************
617 Next - the functions that deal with the mapping CIFS Windows locks onto
618 the underlying system POSIX locks.
619 ****************************************************************************/
622 * Structure used when splitting a lock range
623 * into a POSIX lock range. Doubly linked list.
626 struct lock_list {
627 struct lock_list *next;
628 struct lock_list *prev;
629 off_t start;
630 off_t size;
633 /****************************************************************************
634 Create a list of lock ranges that don't overlap a given range. Used in calculating
635 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
636 understand it :-).
637 ****************************************************************************/
639 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
640 struct lock_list *lhead,
641 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
642 const struct lock_struct *plocks,
643 int num_locks)
645 int i;
648 * Check the current lock list on this dev/inode pair.
649 * Quit if the list is deleted.
652 DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
653 (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
655 for (i=0; i<num_locks && lhead; i++) {
656 const struct lock_struct *lock = &plocks[i];
657 struct lock_list *l_curr;
659 /* Ignore all but read/write locks. */
660 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
661 continue;
664 /* Ignore locks not owned by this process. */
665 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
666 continue;
670 * Walk the lock list, checking for overlaps. Note that
671 * the lock list can expand within this loop if the current
672 * range being examined needs to be split.
675 for (l_curr = lhead; l_curr;) {
677 DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
678 "start=%ju,size=%ju:type=%s",
679 (uintmax_t)lock->fnum,
680 (uintmax_t)lock->start,
681 (uintmax_t)lock->size,
682 posix_lock_type_name(lock->lock_type) ));
684 if ( (l_curr->start >= (lock->start + lock->size)) ||
685 (lock->start >= (l_curr->start + l_curr->size))) {
687 /* No overlap with existing lock - leave this range alone. */
688 /*********************************************
689 +---------+
690 | l_curr |
691 +---------+
692 +-------+
693 | lock |
694 +-------+
695 OR....
696 +---------+
697 | l_curr |
698 +---------+
699 **********************************************/
701 DEBUG(10,(" no overlap case.\n" ));
703 l_curr = l_curr->next;
705 } else if ( (l_curr->start >= lock->start) &&
706 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
709 * This range is completely overlapped by this existing lock range
710 * and thus should have no effect. Delete it from the list.
712 /*********************************************
713 +---------+
714 | l_curr |
715 +---------+
716 +---------------------------+
717 | lock |
718 +---------------------------+
719 **********************************************/
720 /* Save the next pointer */
721 struct lock_list *ul_next = l_curr->next;
723 DEBUG(10,(" delete case.\n" ));
725 DLIST_REMOVE(lhead, l_curr);
726 if(lhead == NULL) {
727 break; /* No more list... */
730 l_curr = ul_next;
732 } else if ( (l_curr->start >= lock->start) &&
733 (l_curr->start < lock->start + lock->size) &&
734 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
737 * This range overlaps the existing lock range at the high end.
738 * Truncate by moving start to existing range end and reducing size.
740 /*********************************************
741 +---------------+
742 | l_curr |
743 +---------------+
744 +---------------+
745 | lock |
746 +---------------+
747 BECOMES....
748 +-------+
749 | l_curr|
750 +-------+
751 **********************************************/
753 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
754 l_curr->start = lock->start + lock->size;
756 DEBUG(10, (" truncate high case: start=%ju,"
757 "size=%ju\n",
758 (uintmax_t)l_curr->start,
759 (uintmax_t)l_curr->size ));
761 l_curr = l_curr->next;
763 } else if ( (l_curr->start < lock->start) &&
764 (l_curr->start + l_curr->size > lock->start) &&
765 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
768 * This range overlaps the existing lock range at the low end.
769 * Truncate by reducing size.
771 /*********************************************
772 +---------------+
773 | l_curr |
774 +---------------+
775 +---------------+
776 | lock |
777 +---------------+
778 BECOMES....
779 +-------+
780 | l_curr|
781 +-------+
782 **********************************************/
784 l_curr->size = lock->start - l_curr->start;
786 DEBUG(10, (" truncate low case: start=%ju,"
787 "size=%ju\n",
788 (uintmax_t)l_curr->start,
789 (uintmax_t)l_curr->size ));
791 l_curr = l_curr->next;
793 } else if ( (l_curr->start < lock->start) &&
794 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
796 * Worst case scenario. Range completely overlaps an existing
797 * lock range. Split the request into two, push the new (upper) request
798 * into the dlink list, and continue with the entry after l_new (as we
799 * know that l_new will not overlap with this lock).
801 /*********************************************
802 +---------------------------+
803 | l_curr |
804 +---------------------------+
805 +---------+
806 | lock |
807 +---------+
808 BECOMES.....
809 +-------+ +---------+
810 | l_curr| | l_new |
811 +-------+ +---------+
812 **********************************************/
813 struct lock_list *l_new = talloc(ctx, struct lock_list);
815 if(l_new == NULL) {
816 DEBUG(0,("posix_lock_list: talloc fail.\n"));
817 return NULL; /* The talloc_destroy takes care of cleanup. */
820 ZERO_STRUCTP(l_new);
821 l_new->start = lock->start + lock->size;
822 l_new->size = l_curr->start + l_curr->size - l_new->start;
824 /* Truncate the l_curr. */
825 l_curr->size = lock->start - l_curr->start;
827 DEBUG(10, (" split case: curr: start=%ju,"
828 "size=%ju new: start=%ju,"
829 "size=%ju\n",
830 (uintmax_t)l_curr->start,
831 (uintmax_t)l_curr->size,
832 (uintmax_t)l_new->start,
833 (uintmax_t)l_new->size ));
836 * Add into the dlink list after the l_curr point - NOT at lhead.
838 DLIST_ADD_AFTER(lhead, l_new, l_curr);
840 /* And move after the link we added. */
841 l_curr = l_new->next;
843 } else {
846 * This logic case should never happen. Ensure this is the
847 * case by forcing an abort.... Remove in production.
849 char *msg = NULL;
851 if (asprintf(&msg, "logic flaw in cases: "
852 "l_curr: start = %ju, "
853 "size = %ju : lock: "
854 "start = %ju, size = %ju",
855 (uintmax_t)l_curr->start,
856 (uintmax_t)l_curr->size,
857 (uintmax_t)lock->start,
858 (uintmax_t)lock->size ) != -1) {
859 smb_panic(msg);
860 } else {
861 smb_panic("posix_lock_list");
864 } /* end for ( l_curr = lhead; l_curr;) */
865 } /* end for (i=0; i<num_locks && ul_head; i++) */
867 return lhead;
870 /****************************************************************************
871 POSIX function to acquire a lock. Returns True if the
872 lock could be granted, False if not.
873 ****************************************************************************/
875 bool set_posix_lock_windows_flavour(files_struct *fsp,
876 uint64_t u_offset,
877 uint64_t u_count,
878 enum brl_type lock_type,
879 const struct lock_context *lock_ctx,
880 const struct lock_struct *plocks,
881 int num_locks,
882 int *errno_ret)
884 off_t offset;
885 off_t count;
886 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
887 bool ret = True;
888 size_t lock_count;
889 TALLOC_CTX *l_ctx = NULL;
890 struct lock_list *llist = NULL;
891 struct lock_list *ll = NULL;
893 DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
894 "count = %ju, type = %s\n", fsp_str_dbg(fsp),
895 (uintmax_t)u_offset, (uintmax_t)u_count,
896 posix_lock_type_name(lock_type)));
899 * If the requested lock won't fit in the POSIX range, we will
900 * pretend it was successful.
903 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
904 increment_lock_ref_count(fsp);
905 return True;
909 * Windows is very strange. It allows read locks to be overlayed
910 * (even over a write lock), but leaves the write lock in force until the first
911 * unlock. It also reference counts the locks. This means the following sequence :
913 * process1 process2
914 * ------------------------------------------------------------------------
915 * WRITE LOCK : start = 2, len = 10
916 * READ LOCK: start =0, len = 10 - FAIL
917 * READ LOCK : start = 0, len = 14
918 * READ LOCK: start =0, len = 10 - FAIL
919 * UNLOCK : start = 2, len = 10
920 * READ LOCK: start =0, len = 10 - OK
922 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
923 * would leave a single read lock over the 0-14 region.
926 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
927 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
928 return False;
931 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
932 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
933 talloc_destroy(l_ctx);
934 return False;
938 * Create the initial list entry containing the
939 * lock we want to add.
942 ZERO_STRUCTP(ll);
943 ll->start = offset;
944 ll->size = count;
946 DLIST_ADD(llist, ll);
949 * The following call calculates if there are any
950 * overlapping locks held by this process on
951 * fd's open on the same file and splits this list
952 * into a list of lock ranges that do not overlap with existing
953 * POSIX locks.
956 llist = posix_lock_list(l_ctx,
957 llist,
958 lock_ctx, /* Lock context llist belongs to. */
959 plocks,
960 num_locks);
963 * Add the POSIX locks on the list of ranges returned.
964 * As the lock is supposed to be added atomically, we need to
965 * back out all the locks if any one of these calls fail.
968 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
969 offset = ll->start;
970 count = ll->size;
972 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
973 "Type = %s: offset = %ju, count = %ju\n",
974 posix_lock_type_name(posix_lock_type),
975 (uintmax_t)offset, (uintmax_t)count ));
977 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
978 *errno_ret = errno;
979 DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
980 "fail !: Type = %s: offset = %ju, "
981 "count = %ju. Errno = %s\n",
982 posix_lock_type_name(posix_lock_type),
983 (uintmax_t)offset, (uintmax_t)count,
984 strerror(errno) ));
985 ret = False;
986 break;
990 if (!ret) {
993 * Back out all the POSIX locks we have on fail.
996 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
997 offset = ll->start;
998 count = ll->size;
1000 DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1001 "out locks: Type = %s: offset = %ju, "
1002 "count = %ju\n",
1003 posix_lock_type_name(posix_lock_type),
1004 (uintmax_t)offset, (uintmax_t)count ));
1006 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1008 } else {
1009 /* Remember the number of locks we have on this dev/ino pair. */
1010 increment_lock_ref_count(fsp);
1013 talloc_destroy(l_ctx);
1014 return ret;
1017 /****************************************************************************
1018 POSIX function to release a lock. Returns True if the
1019 lock could be released, False if not.
1020 ****************************************************************************/
1022 bool release_posix_lock_windows_flavour(files_struct *fsp,
1023 uint64_t u_offset,
1024 uint64_t u_count,
1025 enum brl_type deleted_lock_type,
1026 const struct lock_context *lock_ctx,
1027 const struct lock_struct *plocks,
1028 int num_locks)
1030 off_t offset;
1031 off_t count;
1032 bool ret = True;
1033 TALLOC_CTX *ul_ctx = NULL;
1034 struct lock_list *ulist = NULL;
1035 struct lock_list *ul = NULL;
1037 DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1038 "count = %ju\n", fsp_str_dbg(fsp),
1039 (uintmax_t)u_offset, (uintmax_t)u_count));
1041 /* Remember the number of locks we have on this dev/ino pair. */
1042 decrement_lock_ref_count(fsp);
1045 * If the requested lock won't fit in the POSIX range, we will
1046 * pretend it was successful.
1049 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1050 return True;
1053 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1054 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1055 return False;
1058 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1059 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1060 talloc_destroy(ul_ctx);
1061 return False;
1065 * Create the initial list entry containing the
1066 * lock we want to remove.
1069 ZERO_STRUCTP(ul);
1070 ul->start = offset;
1071 ul->size = count;
1073 DLIST_ADD(ulist, ul);
1076 * The following call calculates if there are any
1077 * overlapping locks held by this process on
1078 * fd's open on the same file and creates a
1079 * list of unlock ranges that will allow
1080 * POSIX lock ranges to remain on the file whilst the
1081 * unlocks are performed.
1084 ulist = posix_lock_list(ul_ctx,
1085 ulist,
1086 lock_ctx, /* Lock context ulist belongs to. */
1087 plocks,
1088 num_locks);
1091 * If there were any overlapped entries (list is > 1 or size or start have changed),
1092 * and the lock_type we just deleted from
1093 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1094 * the POSIX lock to a read lock. This allows any overlapping read locks
1095 * to be atomically maintained.
1098 if (deleted_lock_type == WRITE_LOCK &&
1099 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1101 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1102 "lock to READ: offset = %ju, count = %ju\n",
1103 (uintmax_t)offset, (uintmax_t)count ));
1105 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1106 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1107 talloc_destroy(ul_ctx);
1108 return False;
1113 * Release the POSIX locks on the list of ranges returned.
1116 for(; ulist; ulist = ulist->next) {
1117 offset = ulist->start;
1118 count = ulist->size;
1120 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1121 "offset = %ju, count = %ju\n",
1122 (uintmax_t)offset, (uintmax_t)count ));
1124 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1125 ret = False;
1129 talloc_destroy(ul_ctx);
1130 return ret;
1133 /****************************************************************************
1134 Next - the functions that deal with mapping CIFS POSIX locks onto
1135 the underlying system POSIX locks.
1136 ****************************************************************************/
1138 /****************************************************************************
1139 We only increment the lock ref count when we see a POSIX lock on a context
1140 that doesn't already have them.
1141 ****************************************************************************/
1143 static void increment_posix_lock_count(const files_struct *fsp,
1144 uint64_t smblctx)
1146 NTSTATUS status;
1147 TDB_DATA ctx_key;
1148 TDB_DATA val = { 0 };
1150 ctx_key.dptr = (uint8_t *)&smblctx;
1151 ctx_key.dsize = sizeof(smblctx);
1154 * Don't increment if we already have any POSIX flavor
1155 * locks on this context.
1157 if (dbwrap_exists(posix_pending_close_db, ctx_key)) {
1158 return;
1161 /* Remember that we have POSIX flavor locks on this context. */
1162 status = dbwrap_store(posix_pending_close_db, ctx_key, val, 0);
1163 SMB_ASSERT(NT_STATUS_IS_OK(status));
1165 increment_lock_ref_count(fsp);
1167 DEBUG(10,("posix_locks set for file %s\n",
1168 fsp_str_dbg(fsp)));
1171 static void decrement_posix_lock_count(const files_struct *fsp, uint64_t smblctx)
1173 NTSTATUS status;
1174 TDB_DATA ctx_key;
1176 ctx_key.dptr = (uint8_t *)&smblctx;
1177 ctx_key.dsize = sizeof(smblctx);
1179 status = dbwrap_delete(posix_pending_close_db, ctx_key);
1180 SMB_ASSERT(NT_STATUS_IS_OK(status));
1182 decrement_lock_ref_count(fsp);
1184 DEBUG(10,("posix_locks deleted for file %s\n",
1185 fsp_str_dbg(fsp)));
1188 /****************************************************************************
1189 Return true if any locks exist on the given lock context.
1190 ****************************************************************************/
1192 static bool locks_exist_on_context(const struct lock_struct *plocks,
1193 int num_locks,
1194 const struct lock_context *lock_ctx)
1196 int i;
1198 for (i=0; i < num_locks; i++) {
1199 const struct lock_struct *lock = &plocks[i];
1201 /* Ignore all but read/write locks. */
1202 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1203 continue;
1206 /* Ignore locks not owned by this process. */
1207 if (!server_id_equal(&lock->context.pid, &lock_ctx->pid)) {
1208 continue;
1211 if (lock_ctx->smblctx == lock->context.smblctx) {
1212 return true;
1215 return false;
1218 /****************************************************************************
1219 POSIX function to acquire a lock. Returns True if the
1220 lock could be granted, False if not.
1221 As POSIX locks don't stack or conflict (they just overwrite)
1222 we can map the requested lock directly onto a system one. We
1223 know it doesn't conflict with locks on other contexts as the
1224 upper layer would have refused it.
1225 ****************************************************************************/
1227 bool set_posix_lock_posix_flavour(files_struct *fsp,
1228 uint64_t u_offset,
1229 uint64_t u_count,
1230 enum brl_type lock_type,
1231 const struct lock_context *lock_ctx,
1232 int *errno_ret)
1234 off_t offset;
1235 off_t count;
1236 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1238 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1239 "= %ju, type = %s\n", fsp_str_dbg(fsp),
1240 (uintmax_t)u_offset, (uintmax_t)u_count,
1241 posix_lock_type_name(lock_type)));
1244 * If the requested lock won't fit in the POSIX range, we will
1245 * pretend it was successful.
1248 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1249 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1250 return True;
1253 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1254 *errno_ret = errno;
1255 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1256 posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1257 return False;
1259 increment_posix_lock_count(fsp, lock_ctx->smblctx);
1260 return True;
1263 /****************************************************************************
1264 POSIX function to release a lock. Returns True if the
1265 lock could be released, False if not.
1266 We are given a complete lock state from the upper layer which is what the lock
1267 state should be after the unlock has already been done, so what
1268 we do is punch out holes in the unlock range where locks owned by this process
1269 have a different lock context.
1270 ****************************************************************************/
1272 bool release_posix_lock_posix_flavour(files_struct *fsp,
1273 uint64_t u_offset,
1274 uint64_t u_count,
1275 const struct lock_context *lock_ctx,
1276 const struct lock_struct *plocks,
1277 int num_locks)
1279 bool ret = True;
1280 off_t offset;
1281 off_t count;
1282 TALLOC_CTX *ul_ctx = NULL;
1283 struct lock_list *ulist = NULL;
1284 struct lock_list *ul = NULL;
1286 DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1287 "count = %ju\n", fsp_str_dbg(fsp),
1288 (uintmax_t)u_offset, (uintmax_t)u_count));
1291 * If the requested lock won't fit in the POSIX range, we will
1292 * pretend it was successful.
1295 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1296 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1297 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1299 return True;
1302 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1303 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1304 return False;
1307 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1308 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1309 talloc_destroy(ul_ctx);
1310 return False;
1314 * Create the initial list entry containing the
1315 * lock we want to remove.
1318 ZERO_STRUCTP(ul);
1319 ul->start = offset;
1320 ul->size = count;
1322 DLIST_ADD(ulist, ul);
1325 * Walk the given array creating a linked list
1326 * of unlock requests.
1329 ulist = posix_lock_list(ul_ctx,
1330 ulist,
1331 lock_ctx, /* Lock context ulist belongs to. */
1332 plocks,
1333 num_locks);
1336 * Release the POSIX locks on the list of ranges returned.
1339 for(; ulist; ulist = ulist->next) {
1340 offset = ulist->start;
1341 count = ulist->size;
1343 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1344 "offset = %ju, count = %ju\n",
1345 (uintmax_t)offset, (uintmax_t)count ));
1347 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1348 ret = False;
1352 if (!locks_exist_on_context(plocks, num_locks, lock_ctx)) {
1353 decrement_posix_lock_count(fsp, lock_ctx->smblctx);
1355 talloc_destroy(ul_ctx);
1356 return ret;