s3: Fix a comment
[Samba/gebeck_regimport.git] / source3 / locking / posix.c
blob02d9b6d3e3481d186d833097c2665fcc537fdceb
1 /*
2 Unix SMB/CIFS implementation.
3 Locking functions
4 Copyright (C) Jeremy Allison 1992-2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 Revision History:
21 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
35 * The pending close database handle.
38 static struct db_context *posix_pending_close_db;
40 /****************************************************************************
41 First - the functions that deal with the underlying system locks - these
42 functions are used no matter if we're mapping CIFS Windows locks or CIFS
43 POSIX locks onto POSIX.
44 ****************************************************************************/
46 /****************************************************************************
47 Utility function to map a lock type correctly depending on the open
48 mode of a file.
49 ****************************************************************************/
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
53 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
55 * Many UNIX's cannot get a write lock on a file opened read-only.
56 * Win32 locking semantics allow this.
57 * Do the best we can and attempt a read-only lock.
59 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60 return F_RDLCK;
64 * This return should be the most normal, as we attempt
65 * to always open files read/write.
68 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
71 /****************************************************************************
72 Debugging aid :-).
73 ****************************************************************************/
75 static const char *posix_lock_type_name(int lock_type)
77 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
80 /****************************************************************************
81 Check to see if the given unsigned lock range is within the possible POSIX
82 range. Modifies the given args to be in range if possible, just returns
83 False if not.
84 ****************************************************************************/
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87 uint64_t u_offset, uint64_t u_count)
89 off_t offset = (off_t)u_offset;
90 off_t count = (off_t)u_count;
93 * For the type of system we are, attempt to
94 * find the maximum positive lock offset as an off_t.
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
99 off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
102 * In this case off_t is 64 bits,
103 * and the underlying system can handle 64 bit signed locks.
106 off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107 off_t mask = (mask2<<1);
108 off_t max_positive_lock_offset = ~mask;
110 #endif
112 * POSIX locks of length zero mean lock to end-of-file.
113 * Win32 locks of length zero are point probes. Ignore
114 * any Win32 locks of length zero. JRA.
117 if (count == (off_t)0) {
118 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119 return False;
123 * If the given offset was > max_positive_lock_offset then we cannot map this at all
124 * ignore this lock.
127 if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130 return False;
134 * We must truncate the count to less than max_positive_lock_offset.
137 if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138 count = max_positive_lock_offset;
142 * Truncate count to end at max lock offset.
145 if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146 count = max_positive_lock_offset - offset;
150 * If we ate all the count, ignore this lock.
153 if (count == 0) {
154 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155 (double)u_offset, (double)u_count ));
156 return False;
160 * The mapping was successful.
163 DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164 (double)offset, (double)count ));
166 *offset_out = offset;
167 *count_out = count;
169 return True;
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173 struct files_struct *fsp, int op, off_t offset,
174 off_t count, int type)
176 VFS_FIND(lock);
177 return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
180 /****************************************************************************
181 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182 broken NFS implementations.
183 ****************************************************************************/
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
187 bool ret;
189 DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
191 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
193 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
195 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196 (double)offset,(double)count));
197 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
201 * If the offset is > 0x7FFFFFFF then this will cause problems on
202 * 32 bit NFS mounted filesystems. Just ignore it.
205 if (offset & ~((off_t)0x7fffffff)) {
206 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207 return True;
210 if (count & ~((off_t)0x7fffffff)) {
211 /* 32 bit NFS file system, retry with smaller offset */
212 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213 errno = 0;
214 count &= 0x7fffffff;
215 ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
219 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220 return ret;
223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
224 struct files_struct *fsp, off_t *poffset,
225 off_t *pcount, int *ptype, pid_t *ppid)
227 VFS_FIND(getlock);
228 return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
229 ppid);
232 /****************************************************************************
233 Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
234 broken NFS implementations.
235 ****************************************************************************/
237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
239 pid_t pid;
240 bool ret;
242 DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
243 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
245 ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
247 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
249 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
250 (double)*poffset,(double)*pcount));
251 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
252 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
255 * If the offset is > 0x7FFFFFFF then this will cause problems on
256 * 32 bit NFS mounted filesystems. Just ignore it.
259 if (*poffset & ~((off_t)0x7fffffff)) {
260 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
261 return True;
264 if (*pcount & ~((off_t)0x7fffffff)) {
265 /* 32 bit NFS file system, retry with smaller offset */
266 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
267 errno = 0;
268 *pcount &= 0x7fffffff;
269 ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
273 DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
274 return ret;
277 /****************************************************************************
278 POSIX function to see if a file region is locked. Returns True if the
279 region is locked, False otherwise.
280 ****************************************************************************/
282 bool is_posix_locked(files_struct *fsp,
283 uint64_t *pu_offset,
284 uint64_t *pu_count,
285 enum brl_type *plock_type,
286 enum brl_flavour lock_flav)
288 off_t offset;
289 off_t count;
290 int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
292 DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
293 "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
294 (double)*pu_count, posix_lock_type_name(*plock_type)));
297 * If the requested lock won't fit in the POSIX range, we will
298 * never set it, so presume it is not locked.
301 if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
302 return False;
305 if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
306 return False;
309 if (posix_lock_type == F_UNLCK) {
310 return False;
313 if (lock_flav == POSIX_LOCK) {
314 /* Only POSIX lock queries need to know the details. */
315 *pu_offset = (uint64_t)offset;
316 *pu_count = (uint64_t)count;
317 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
319 return True;
322 /****************************************************************************
323 Next - the functions that deal with in memory database storing representations
324 of either Windows CIFS locks or POSIX CIFS locks.
325 ****************************************************************************/
327 /* The key used in the in-memory POSIX databases. */
329 struct lock_ref_count_key {
330 struct file_id id;
331 char r;
334 /*******************************************************************
335 Form a static locking key for a dev/inode pair for the lock ref count
336 ******************************************************************/
338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
339 struct lock_ref_count_key *tmp)
341 ZERO_STRUCTP(tmp);
342 tmp->id = fsp->file_id;
343 tmp->r = 'r';
344 return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
347 /*******************************************************************
348 Convenience function to get an fd_array key from an fsp.
349 ******************************************************************/
351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
353 return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
356 /*******************************************************************
357 Create the in-memory POSIX lock databases.
358 ********************************************************************/
360 bool posix_locking_init(bool read_only)
362 if (posix_pending_close_db != NULL) {
363 return true;
366 posix_pending_close_db = db_open_rbt(NULL);
368 if (posix_pending_close_db == NULL) {
369 DEBUG(0,("Failed to open POSIX pending close database.\n"));
370 return false;
373 return true;
376 /*******************************************************************
377 Delete the in-memory POSIX lock databases.
378 ********************************************************************/
380 bool posix_locking_end(void)
383 * Shouldn't we close all fd's here?
385 TALLOC_FREE(posix_pending_close_db);
386 return true;
389 /****************************************************************************
390 Next - the functions that deal with storing fd's that have outstanding
391 POSIX locks when closed.
392 ****************************************************************************/
394 /****************************************************************************
395 The records in posix_pending_close_db are composed of an array of
396 ints keyed by dev/ino pair. Those ints are the fd's that were open on
397 this dev/ino pair that should have been closed, but can't as the lock
398 ref count is non zero.
399 ****************************************************************************/
401 /****************************************************************************
402 Keep a reference count of the number of Windows locks open on this dev/ino
403 pair. Creates entry if it doesn't exist.
404 ****************************************************************************/
406 static void increment_windows_lock_ref_count(files_struct *fsp)
408 struct lock_ref_count_key tmp;
409 struct db_record *rec;
410 int lock_ref_count = 0;
411 NTSTATUS status;
412 TDB_DATA value;
414 rec = dbwrap_fetch_locked(
415 posix_pending_close_db, talloc_tos(),
416 locking_ref_count_key_fsp(fsp, &tmp));
418 SMB_ASSERT(rec != NULL);
420 value = dbwrap_record_get_value(rec);
422 if (value.dptr != NULL) {
423 SMB_ASSERT(value.dsize == sizeof(lock_ref_count));
424 memcpy(&lock_ref_count, value.dptr,
425 sizeof(lock_ref_count));
428 lock_ref_count++;
430 status = dbwrap_record_store(rec,
431 make_tdb_data((uint8 *)&lock_ref_count,
432 sizeof(lock_ref_count)), 0);
434 SMB_ASSERT(NT_STATUS_IS_OK(status));
436 TALLOC_FREE(rec);
438 DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
439 fsp_str_dbg(fsp), lock_ref_count));
442 /****************************************************************************
443 Bulk delete - subtract as many locks as we've just deleted.
444 ****************************************************************************/
446 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
448 struct lock_ref_count_key tmp;
449 struct db_record *rec;
450 int lock_ref_count = 0;
451 NTSTATUS status;
452 TDB_DATA value;
454 rec = dbwrap_fetch_locked(
455 posix_pending_close_db, talloc_tos(),
456 locking_ref_count_key_fsp(fsp, &tmp));
458 if (rec == NULL) {
459 DEBUG(0, ("reduce_windows_lock_ref_count: rec not found\n"));
460 return;
463 value = dbwrap_record_get_value(rec);
465 if ((value.dptr == NULL) || (value.dsize != sizeof(lock_ref_count))) {
466 DEBUG(0, ("reduce_windows_lock_ref_count: wrong value\n"));
467 TALLOC_FREE(rec);
468 return;
471 memcpy(&lock_ref_count, value.dptr, sizeof(lock_ref_count));
473 SMB_ASSERT(lock_ref_count > 0);
475 lock_ref_count -= dcount;
477 status = dbwrap_record_store(rec,
478 make_tdb_data((uint8 *)&lock_ref_count,
479 sizeof(lock_ref_count)), 0);
481 SMB_ASSERT(NT_STATUS_IS_OK(status));
483 TALLOC_FREE(rec);
485 DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
486 fsp_str_dbg(fsp), lock_ref_count));
489 static void decrement_windows_lock_ref_count(files_struct *fsp)
491 reduce_windows_lock_ref_count(fsp, 1);
494 /****************************************************************************
495 Fetch the lock ref count.
496 ****************************************************************************/
498 static int get_windows_lock_ref_count(files_struct *fsp)
500 struct lock_ref_count_key tmp;
501 TDB_DATA dbuf;
502 NTSTATUS status;
503 int lock_ref_count = 0;
505 status = dbwrap_fetch(
506 posix_pending_close_db, talloc_tos(),
507 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
509 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
510 goto done;
513 if (!NT_STATUS_IS_OK(status)) {
514 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
515 "lock ref count for file %s: %s\n",
516 fsp_str_dbg(fsp), nt_errstr(status)));
517 goto done;
520 if (dbuf.dsize != sizeof(lock_ref_count)) {
521 DEBUG(0, ("get_windows_lock_ref_count: invalid entry "
522 "in lock ref count record for file %s: "
523 "(invalid data size %u)\n",
524 fsp_str_dbg(fsp), (unsigned int)dbuf.dsize));
525 goto done;
528 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
529 TALLOC_FREE(dbuf.dptr);
531 done:
532 DEBUG(10,("get_windows_lock_count for file %s = %d\n",
533 fsp_str_dbg(fsp), lock_ref_count));
535 return lock_ref_count;
538 /****************************************************************************
539 Delete a lock_ref_count entry.
540 ****************************************************************************/
542 static void delete_windows_lock_ref_count(files_struct *fsp)
544 struct lock_ref_count_key tmp;
545 struct db_record *rec;
547 rec = dbwrap_fetch_locked(
548 posix_pending_close_db, talloc_tos(),
549 locking_ref_count_key_fsp(fsp, &tmp));
551 SMB_ASSERT(rec != NULL);
553 /* Not a bug if it doesn't exist - no locks were ever granted. */
555 dbwrap_record_delete(rec);
556 TALLOC_FREE(rec);
558 DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
559 fsp_str_dbg(fsp)));
562 /****************************************************************************
563 Add an fd to the pending close tdb.
564 ****************************************************************************/
566 static void add_fd_to_close_entry(files_struct *fsp)
568 struct db_record *rec;
569 int *fds;
570 size_t num_fds;
571 NTSTATUS status;
572 TDB_DATA value;
574 rec = dbwrap_fetch_locked(
575 posix_pending_close_db, talloc_tos(),
576 fd_array_key_fsp(fsp));
578 SMB_ASSERT(rec != NULL);
580 value = dbwrap_record_get_value(rec);
581 SMB_ASSERT((value.dsize % sizeof(int)) == 0);
583 num_fds = value.dsize / sizeof(int);
584 fds = talloc_array(rec, int, num_fds+1);
586 SMB_ASSERT(fds != NULL);
588 memcpy(fds, value.dptr, value.dsize);
589 fds[num_fds] = fsp->fh->fd;
591 status = dbwrap_record_store(
592 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
594 SMB_ASSERT(NT_STATUS_IS_OK(status));
596 TALLOC_FREE(rec);
598 DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
599 fsp->fh->fd, fsp_str_dbg(fsp)));
602 /****************************************************************************
603 Remove all fd entries for a specific dev/inode pair from the tdb.
604 ****************************************************************************/
606 static void delete_close_entries(files_struct *fsp)
608 struct db_record *rec;
610 rec = dbwrap_fetch_locked(
611 posix_pending_close_db, talloc_tos(),
612 fd_array_key_fsp(fsp));
614 SMB_ASSERT(rec != NULL);
615 dbwrap_record_delete(rec);
616 TALLOC_FREE(rec);
619 /****************************************************************************
620 Get the array of POSIX pending close records for an open fsp. Returns number
621 of entries.
622 ****************************************************************************/
624 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
625 files_struct *fsp, int **entries)
627 TDB_DATA dbuf;
628 NTSTATUS status;
630 status = dbwrap_fetch(
631 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
632 &dbuf);
634 if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
635 *entries = NULL;
636 return 0;
639 SMB_ASSERT(NT_STATUS_IS_OK(status));
641 if (dbuf.dsize == 0) {
642 *entries = NULL;
643 return 0;
646 *entries = (int *)dbuf.dptr;
647 return (size_t)(dbuf.dsize / sizeof(int));
650 /****************************************************************************
651 Deal with pending closes needed by POSIX locking support.
652 Note that posix_locking_close_file() is expected to have been called
653 to delete all locks on this fsp before this function is called.
654 ****************************************************************************/
656 int fd_close_posix(struct files_struct *fsp)
658 int saved_errno = 0;
659 int ret;
660 int *fd_array = NULL;
661 size_t count, i;
663 if (!lp_locking(fsp->conn->params) ||
664 !lp_posix_locking(fsp->conn->params))
667 * No locking or POSIX to worry about or we want POSIX semantics
668 * which will lose all locks on all fd's open on this dev/inode,
669 * just close.
671 return close(fsp->fh->fd);
674 if (get_windows_lock_ref_count(fsp)) {
677 * There are outstanding locks on this dev/inode pair on
678 * other fds. Add our fd to the pending close tdb and set
679 * fsp->fh->fd to -1.
682 add_fd_to_close_entry(fsp);
683 return 0;
687 * No outstanding locks. Get the pending close fd's
688 * from the tdb and close them all.
691 count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
693 if (count) {
694 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
695 (unsigned int)count));
697 for(i = 0; i < count; i++) {
698 if (close(fd_array[i]) == -1) {
699 saved_errno = errno;
704 * Delete all fd's stored in the tdb
705 * for this dev/inode pair.
708 delete_close_entries(fsp);
711 TALLOC_FREE(fd_array);
713 /* Don't need a lock ref count on this dev/ino anymore. */
714 delete_windows_lock_ref_count(fsp);
717 * Finally close the fd associated with this fsp.
720 ret = close(fsp->fh->fd);
722 if (ret == 0 && saved_errno != 0) {
723 errno = saved_errno;
724 ret = -1;
727 return ret;
730 /****************************************************************************
731 Next - the functions that deal with the mapping CIFS Windows locks onto
732 the underlying system POSIX locks.
733 ****************************************************************************/
736 * Structure used when splitting a lock range
737 * into a POSIX lock range. Doubly linked list.
740 struct lock_list {
741 struct lock_list *next;
742 struct lock_list *prev;
743 off_t start;
744 off_t size;
747 /****************************************************************************
748 Create a list of lock ranges that don't overlap a given range. Used in calculating
749 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
750 understand it :-).
751 ****************************************************************************/
753 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
754 struct lock_list *lhead,
755 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
756 files_struct *fsp,
757 const struct lock_struct *plocks,
758 int num_locks)
760 int i;
763 * Check the current lock list on this dev/inode pair.
764 * Quit if the list is deleted.
767 DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
768 (double)lhead->start, (double)lhead->size ));
770 for (i=0; i<num_locks && lhead; i++) {
771 const struct lock_struct *lock = &plocks[i];
772 struct lock_list *l_curr;
774 /* Ignore all but read/write locks. */
775 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
776 continue;
779 /* Ignore locks not owned by this process. */
780 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
781 continue;
785 * Walk the lock list, checking for overlaps. Note that
786 * the lock list can expand within this loop if the current
787 * range being examined needs to be split.
790 for (l_curr = lhead; l_curr;) {
792 DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
793 (unsigned long long)lock->fnum,
794 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
796 if ( (l_curr->start >= (lock->start + lock->size)) ||
797 (lock->start >= (l_curr->start + l_curr->size))) {
799 /* No overlap with existing lock - leave this range alone. */
800 /*********************************************
801 +---------+
802 | l_curr |
803 +---------+
804 +-------+
805 | lock |
806 +-------+
807 OR....
808 +---------+
809 | l_curr |
810 +---------+
811 **********************************************/
813 DEBUG(10,(" no overlap case.\n" ));
815 l_curr = l_curr->next;
817 } else if ( (l_curr->start >= lock->start) &&
818 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
821 * This range is completely overlapped by this existing lock range
822 * and thus should have no effect. Delete it from the list.
824 /*********************************************
825 +---------+
826 | l_curr |
827 +---------+
828 +---------------------------+
829 | lock |
830 +---------------------------+
831 **********************************************/
832 /* Save the next pointer */
833 struct lock_list *ul_next = l_curr->next;
835 DEBUG(10,(" delete case.\n" ));
837 DLIST_REMOVE(lhead, l_curr);
838 if(lhead == NULL) {
839 break; /* No more list... */
842 l_curr = ul_next;
844 } else if ( (l_curr->start >= lock->start) &&
845 (l_curr->start < lock->start + lock->size) &&
846 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
849 * This range overlaps the existing lock range at the high end.
850 * Truncate by moving start to existing range end and reducing size.
852 /*********************************************
853 +---------------+
854 | l_curr |
855 +---------------+
856 +---------------+
857 | lock |
858 +---------------+
859 BECOMES....
860 +-------+
861 | l_curr|
862 +-------+
863 **********************************************/
865 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
866 l_curr->start = lock->start + lock->size;
868 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
869 (double)l_curr->start, (double)l_curr->size ));
871 l_curr = l_curr->next;
873 } else if ( (l_curr->start < lock->start) &&
874 (l_curr->start + l_curr->size > lock->start) &&
875 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
878 * This range overlaps the existing lock range at the low end.
879 * Truncate by reducing size.
881 /*********************************************
882 +---------------+
883 | l_curr |
884 +---------------+
885 +---------------+
886 | lock |
887 +---------------+
888 BECOMES....
889 +-------+
890 | l_curr|
891 +-------+
892 **********************************************/
894 l_curr->size = lock->start - l_curr->start;
896 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
897 (double)l_curr->start, (double)l_curr->size ));
899 l_curr = l_curr->next;
901 } else if ( (l_curr->start < lock->start) &&
902 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
904 * Worst case scenario. Range completely overlaps an existing
905 * lock range. Split the request into two, push the new (upper) request
906 * into the dlink list, and continue with the entry after l_new (as we
907 * know that l_new will not overlap with this lock).
909 /*********************************************
910 +---------------------------+
911 | l_curr |
912 +---------------------------+
913 +---------+
914 | lock |
915 +---------+
916 BECOMES.....
917 +-------+ +---------+
918 | l_curr| | l_new |
919 +-------+ +---------+
920 **********************************************/
921 struct lock_list *l_new = talloc(ctx, struct lock_list);
923 if(l_new == NULL) {
924 DEBUG(0,("posix_lock_list: talloc fail.\n"));
925 return NULL; /* The talloc_destroy takes care of cleanup. */
928 ZERO_STRUCTP(l_new);
929 l_new->start = lock->start + lock->size;
930 l_new->size = l_curr->start + l_curr->size - l_new->start;
932 /* Truncate the l_curr. */
933 l_curr->size = lock->start - l_curr->start;
935 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
936 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
937 (double)l_new->start, (double)l_new->size ));
940 * Add into the dlink list after the l_curr point - NOT at lhead.
942 DLIST_ADD_AFTER(lhead, l_new, l_curr);
944 /* And move after the link we added. */
945 l_curr = l_new->next;
947 } else {
950 * This logic case should never happen. Ensure this is the
951 * case by forcing an abort.... Remove in production.
953 char *msg = NULL;
955 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
956 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
957 smb_panic(msg);
958 } else {
959 smb_panic("posix_lock_list");
962 } /* end for ( l_curr = lhead; l_curr;) */
963 } /* end for (i=0; i<num_locks && ul_head; i++) */
965 return lhead;
968 /****************************************************************************
969 POSIX function to acquire a lock. Returns True if the
970 lock could be granted, False if not.
971 ****************************************************************************/
973 bool set_posix_lock_windows_flavour(files_struct *fsp,
974 uint64_t u_offset,
975 uint64_t u_count,
976 enum brl_type lock_type,
977 const struct lock_context *lock_ctx,
978 const struct lock_struct *plocks,
979 int num_locks,
980 int *errno_ret)
982 off_t offset;
983 off_t count;
984 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
985 bool ret = True;
986 size_t lock_count;
987 TALLOC_CTX *l_ctx = NULL;
988 struct lock_list *llist = NULL;
989 struct lock_list *ll = NULL;
991 DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
992 "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
993 (double)u_offset, (double)u_count,
994 posix_lock_type_name(lock_type)));
997 * If the requested lock won't fit in the POSIX range, we will
998 * pretend it was successful.
1001 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1002 increment_windows_lock_ref_count(fsp);
1003 return True;
1007 * Windows is very strange. It allows read locks to be overlayed
1008 * (even over a write lock), but leaves the write lock in force until the first
1009 * unlock. It also reference counts the locks. This means the following sequence :
1011 * process1 process2
1012 * ------------------------------------------------------------------------
1013 * WRITE LOCK : start = 2, len = 10
1014 * READ LOCK: start =0, len = 10 - FAIL
1015 * READ LOCK : start = 0, len = 14
1016 * READ LOCK: start =0, len = 10 - FAIL
1017 * UNLOCK : start = 2, len = 10
1018 * READ LOCK: start =0, len = 10 - OK
1020 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1021 * would leave a single read lock over the 0-14 region.
1024 if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1025 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1026 return False;
1029 if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1030 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1031 talloc_destroy(l_ctx);
1032 return False;
1036 * Create the initial list entry containing the
1037 * lock we want to add.
1040 ZERO_STRUCTP(ll);
1041 ll->start = offset;
1042 ll->size = count;
1044 DLIST_ADD(llist, ll);
1047 * The following call calculates if there are any
1048 * overlapping locks held by this process on
1049 * fd's open on the same file and splits this list
1050 * into a list of lock ranges that do not overlap with existing
1051 * POSIX locks.
1054 llist = posix_lock_list(l_ctx,
1055 llist,
1056 lock_ctx, /* Lock context llist belongs to. */
1057 fsp,
1058 plocks,
1059 num_locks);
1062 * Add the POSIX locks on the list of ranges returned.
1063 * As the lock is supposed to be added atomically, we need to
1064 * back out all the locks if any one of these calls fail.
1067 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1068 offset = ll->start;
1069 count = ll->size;
1071 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1072 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1074 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1075 *errno_ret = errno;
1076 DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1077 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1078 ret = False;
1079 break;
1083 if (!ret) {
1086 * Back out all the POSIX locks we have on fail.
1089 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1090 offset = ll->start;
1091 count = ll->size;
1093 DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1094 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1096 posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1098 } else {
1099 /* Remember the number of Windows locks we have on this dev/ino pair. */
1100 increment_windows_lock_ref_count(fsp);
1103 talloc_destroy(l_ctx);
1104 return ret;
1107 /****************************************************************************
1108 POSIX function to release a lock. Returns True if the
1109 lock could be released, False if not.
1110 ****************************************************************************/
1112 bool release_posix_lock_windows_flavour(files_struct *fsp,
1113 uint64_t u_offset,
1114 uint64_t u_count,
1115 enum brl_type deleted_lock_type,
1116 const struct lock_context *lock_ctx,
1117 const struct lock_struct *plocks,
1118 int num_locks)
1120 off_t offset;
1121 off_t count;
1122 bool ret = True;
1123 TALLOC_CTX *ul_ctx = NULL;
1124 struct lock_list *ulist = NULL;
1125 struct lock_list *ul = NULL;
1127 DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1128 "count = %.0f\n", fsp_str_dbg(fsp),
1129 (double)u_offset, (double)u_count));
1131 /* Remember the number of Windows locks we have on this dev/ino pair. */
1132 decrement_windows_lock_ref_count(fsp);
1135 * If the requested lock won't fit in the POSIX range, we will
1136 * pretend it was successful.
1139 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1140 return True;
1143 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1144 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1145 return False;
1148 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1149 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1150 talloc_destroy(ul_ctx);
1151 return False;
1155 * Create the initial list entry containing the
1156 * lock we want to remove.
1159 ZERO_STRUCTP(ul);
1160 ul->start = offset;
1161 ul->size = count;
1163 DLIST_ADD(ulist, ul);
1166 * The following call calculates if there are any
1167 * overlapping locks held by this process on
1168 * fd's open on the same file and creates a
1169 * list of unlock ranges that will allow
1170 * POSIX lock ranges to remain on the file whilst the
1171 * unlocks are performed.
1174 ulist = posix_lock_list(ul_ctx,
1175 ulist,
1176 lock_ctx, /* Lock context ulist belongs to. */
1177 fsp,
1178 plocks,
1179 num_locks);
1182 * If there were any overlapped entries (list is > 1 or size or start have changed),
1183 * and the lock_type we just deleted from
1184 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1185 * the POSIX lock to a read lock. This allows any overlapping read locks
1186 * to be atomically maintained.
1189 if (deleted_lock_type == WRITE_LOCK &&
1190 (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1192 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1193 (double)offset, (double)count ));
1195 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1196 DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1197 talloc_destroy(ul_ctx);
1198 return False;
1203 * Release the POSIX locks on the list of ranges returned.
1206 for(; ulist; ulist = ulist->next) {
1207 offset = ulist->start;
1208 count = ulist->size;
1210 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1211 (double)offset, (double)count ));
1213 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1214 ret = False;
1218 talloc_destroy(ul_ctx);
1219 return ret;
1222 /****************************************************************************
1223 Next - the functions that deal with mapping CIFS POSIX locks onto
1224 the underlying system POSIX locks.
1225 ****************************************************************************/
1227 /****************************************************************************
1228 POSIX function to acquire a lock. Returns True if the
1229 lock could be granted, False if not.
1230 As POSIX locks don't stack or conflict (they just overwrite)
1231 we can map the requested lock directly onto a system one. We
1232 know it doesn't conflict with locks on other contexts as the
1233 upper layer would have refused it.
1234 ****************************************************************************/
1236 bool set_posix_lock_posix_flavour(files_struct *fsp,
1237 uint64_t u_offset,
1238 uint64_t u_count,
1239 enum brl_type lock_type,
1240 int *errno_ret)
1242 off_t offset;
1243 off_t count;
1244 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1246 DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1247 "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1248 (double)u_offset, (double)u_count,
1249 posix_lock_type_name(lock_type)));
1252 * If the requested lock won't fit in the POSIX range, we will
1253 * pretend it was successful.
1256 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1257 return True;
1260 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1261 *errno_ret = errno;
1262 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1263 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1264 return False;
1266 return True;
1269 /****************************************************************************
1270 POSIX function to release a lock. Returns True if the
1271 lock could be released, False if not.
1272 We are given a complete lock state from the upper layer which is what the lock
1273 state should be after the unlock has already been done, so what
1274 we do is punch out holes in the unlock range where locks owned by this process
1275 have a different lock context.
1276 ****************************************************************************/
1278 bool release_posix_lock_posix_flavour(files_struct *fsp,
1279 uint64_t u_offset,
1280 uint64_t u_count,
1281 const struct lock_context *lock_ctx,
1282 const struct lock_struct *plocks,
1283 int num_locks)
1285 bool ret = True;
1286 off_t offset;
1287 off_t count;
1288 TALLOC_CTX *ul_ctx = NULL;
1289 struct lock_list *ulist = NULL;
1290 struct lock_list *ul = NULL;
1292 DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1293 "count = %.0f\n", fsp_str_dbg(fsp),
1294 (double)u_offset, (double)u_count));
1297 * If the requested lock won't fit in the POSIX range, we will
1298 * pretend it was successful.
1301 if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1302 return True;
1305 if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1306 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1307 return False;
1310 if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1311 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1312 talloc_destroy(ul_ctx);
1313 return False;
1317 * Create the initial list entry containing the
1318 * lock we want to remove.
1321 ZERO_STRUCTP(ul);
1322 ul->start = offset;
1323 ul->size = count;
1325 DLIST_ADD(ulist, ul);
1328 * Walk the given array creating a linked list
1329 * of unlock requests.
1332 ulist = posix_lock_list(ul_ctx,
1333 ulist,
1334 lock_ctx, /* Lock context ulist belongs to. */
1335 fsp,
1336 plocks,
1337 num_locks);
1340 * Release the POSIX locks on the list of ranges returned.
1343 for(; ulist; ulist = ulist->next) {
1344 offset = ulist->start;
1345 count = ulist->size;
1347 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1348 (double)offset, (double)count ));
1350 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1351 ret = False;
1355 talloc_destroy(ul_ctx);
1356 return ret;