few edits
[Samba.git] / source / locking / posix.c
blob8b3538d8ca11e62b45958d62abe8b684b2e61816
1 /*
2 Unix SMB/Netbios implementation.
3 Version 3.0
4 Locking functions
5 Copyright (C) Jeremy Allison 1992-2000
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Revision History:
23 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
26 #include "includes.h"
29 * The POSIX locking database handle.
32 static TDB_CONTEXT *posix_lock_tdb;
35 * The pending close database handle.
38 static TDB_CONTEXT *posix_pending_close_tdb;
41 * The data in POSIX lock records is an unsorted linear array of these
42 * records. It is unnecessary to store the count as tdb provides the
43 * size of the record.
46 struct posix_lock {
47 int fd;
48 SMB_OFF_T start;
49 SMB_OFF_T size;
50 int lock_type;
54 * The data in POSIX pending close records is an unsorted linear array of int
55 * records. It is unnecessary to store the count as tdb provides the
56 * size of the record.
59 /* The key used in both the POSIX databases. */
61 struct posix_lock_key {
62 SMB_DEV_T device;
63 SMB_INO_T inode;
64 };
66 /*******************************************************************
67 Form a static locking key for a dev/inode pair.
68 ******************************************************************/
70 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
72 static struct posix_lock_key key;
73 TDB_DATA kbuf;
75 memset(&key, '\0', sizeof(key));
76 key.device = dev;
77 key.inode = inode;
78 kbuf.dptr = (char *)&key;
79 kbuf.dsize = sizeof(key);
80 return kbuf;
83 /*******************************************************************
84 Convenience function to get a key from an fsp.
85 ******************************************************************/
87 static TDB_DATA locking_key_fsp(files_struct *fsp)
89 return locking_key(fsp->dev, fsp->inode);
92 /****************************************************************************
93 Add an fd to the pending close tdb.
94 ****************************************************************************/
96 static BOOL add_fd_to_close_entry(files_struct *fsp)
98 TDB_DATA kbuf = locking_key_fsp(fsp);
99 TDB_DATA dbuf;
100 char *tp;
102 dbuf.dptr = NULL;
104 dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
106 tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
107 if (!tp) {
108 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
109 SAFE_FREE(dbuf.dptr);
110 return False;
111 } else
112 dbuf.dptr = tp;
114 memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
115 dbuf.dsize += sizeof(int);
117 if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
121 SAFE_FREE(dbuf.dptr);
122 return True;
125 /****************************************************************************
126 Remove all fd entries for a specific dev/inode pair from the tdb.
127 ****************************************************************************/
129 static void delete_close_entries(files_struct *fsp)
131 TDB_DATA kbuf = locking_key_fsp(fsp);
133 if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
137 /****************************************************************************
138 Get the array of POSIX pending close records for an open fsp. Caller must
139 free. Returns number of entries.
140 ****************************************************************************/
142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
144 TDB_DATA kbuf = locking_key_fsp(fsp);
145 TDB_DATA dbuf;
146 size_t count = 0;
148 *entries = NULL;
149 dbuf.dptr = NULL;
151 dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
153 if (!dbuf.dptr) {
154 return 0;
157 *entries = (int *)dbuf.dptr;
158 count = (size_t)(dbuf.dsize / sizeof(int));
160 return count;
163 /****************************************************************************
164 Get the array of POSIX locks for an fsp. Caller must free. Returns
165 number of entries.
166 ****************************************************************************/
168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
170 TDB_DATA kbuf = locking_key_fsp(fsp);
171 TDB_DATA dbuf;
172 size_t count = 0;
174 *entries = NULL;
176 dbuf.dptr = NULL;
178 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
180 if (!dbuf.dptr) {
181 return 0;
184 *entries = (struct posix_lock *)dbuf.dptr;
185 count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
187 return count;
190 /****************************************************************************
191 Deal with pending closes needed by POSIX locking support.
192 Note that posix_locking_close_file() is expected to have been called
193 to delete all locks on this fsp before this function is called.
194 ****************************************************************************/
196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
198 int saved_errno = 0;
199 int ret;
200 size_t count, i;
201 struct posix_lock *entries = NULL;
202 int *fd_array = NULL;
203 BOOL locks_on_other_fds = False;
205 if (!lp_posix_locking(SNUM(conn))) {
207 * No POSIX to worry about, just close.
209 ret = conn->vfs_ops.close(fsp,fsp->fd);
210 fsp->fd = -1;
211 return ret;
215 * Get the number of outstanding POSIX locks on this dev/inode pair.
218 count = get_posix_lock_entries(fsp, &entries);
221 * Check if there are any outstanding locks belonging to
222 * other fd's. This should never be the case if posix_locking_close_file()
223 * has been called first, but it never hurts to be *sure*.
226 for (i = 0; i < count; i++) {
227 if (entries[i].fd != fsp->fd) {
228 locks_on_other_fds = True;
229 break;
233 if (locks_on_other_fds) {
236 * There are outstanding locks on this dev/inode pair on other fds.
237 * Add our fd to the pending close tdb and set fsp->fd to -1.
240 if (!add_fd_to_close_entry(fsp)) {
241 SAFE_FREE(entries);
242 return False;
245 SAFE_FREE(entries);
246 fsp->fd = -1;
247 return 0;
250 SAFE_FREE(entries);
253 * No outstanding POSIX locks. Get the pending close fd's
254 * from the tdb and close them all.
257 count = get_posix_pending_close_entries(fsp, &fd_array);
259 if (count) {
260 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
262 for(i = 0; i < count; i++) {
263 if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
264 saved_errno = errno;
269 * Delete all fd's stored in the tdb
270 * for this dev/inode pair.
273 delete_close_entries(fsp);
276 SAFE_FREE(fd_array);
279 * Finally close the fd associated with this fsp.
282 ret = conn->vfs_ops.close(fsp,fsp->fd);
284 if (saved_errno != 0) {
285 errno = saved_errno;
286 ret = -1;
289 fsp->fd = -1;
291 return ret;
294 /****************************************************************************
295 Debugging aid :-).
296 ****************************************************************************/
298 static const char *posix_lock_type_name(int lock_type)
300 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
303 /****************************************************************************
304 Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
305 then the POSIX fcntl lock fails.
306 ****************************************************************************/
308 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
310 TDB_DATA kbuf = locking_key_fsp(fsp);
311 TDB_DATA dbuf;
312 struct posix_lock *locks;
313 size_t count;
315 dbuf.dptr = NULL;
317 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
319 if (!dbuf.dptr) {
320 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
321 goto fail;
324 count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
325 locks = (struct posix_lock *)dbuf.dptr;
327 if (count == 1) {
328 tdb_delete(posix_lock_tdb, kbuf);
329 } else {
330 if (entry < count-1) {
331 memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
333 dbuf.dsize -= sizeof(*locks);
334 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
337 SAFE_FREE(dbuf.dptr);
339 return True;
341 fail:
342 SAFE_FREE(dbuf.dptr);
343 return False;
346 /****************************************************************************
347 Add an entry into the POSIX locking tdb. We return the index number of the
348 added lock (used in case we need to delete *exactly* this entry). Returns
349 False on fail, True on success.
350 ****************************************************************************/
352 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
354 TDB_DATA kbuf = locking_key_fsp(fsp);
355 TDB_DATA dbuf;
356 struct posix_lock pl;
357 char *tp;
359 dbuf.dptr = NULL;
361 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
363 *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
366 * Add new record.
369 pl.fd = fsp->fd;
370 pl.start = start;
371 pl.size = size;
372 pl.lock_type = lock_type;
374 tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
375 if (!tp) {
376 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
377 goto fail;
378 } else
379 dbuf.dptr = tp;
381 memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
382 dbuf.dsize += sizeof(pl);
384 if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
385 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
386 goto fail;
389 SAFE_FREE(dbuf.dptr);
391 DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
392 fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
393 (double)fsp->dev, (double)fsp->inode ));
395 return True;
397 fail:
398 SAFE_FREE(dbuf.dptr);
399 return False;
402 /****************************************************************************
403 Calculate if locks have any overlap at all.
404 ****************************************************************************/
406 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
408 if (start1 >= start2 && start1 <= start2 + size2)
409 return True;
411 if (start1 < start2 && start1 + size1 > start2)
412 return True;
414 return False;
417 /****************************************************************************
418 Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
419 deleted and the number of records that are overlapped by this one, or -1 on error.
420 ****************************************************************************/
422 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
424 TDB_DATA kbuf = locking_key_fsp(fsp);
425 TDB_DATA dbuf;
426 struct posix_lock *locks;
427 size_t i, count;
428 BOOL found = False;
429 int num_overlapping_records = 0;
431 dbuf.dptr = NULL;
433 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
435 if (!dbuf.dptr) {
436 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
437 goto fail;
440 /* There are existing locks - find a match. */
441 locks = (struct posix_lock *)dbuf.dptr;
442 count = (size_t)(dbuf.dsize / sizeof(*locks));
445 * Search for and delete the first record that matches the
446 * unlock criteria.
449 for (i=0; i<count; i++) {
450 struct posix_lock *entry = &locks[i];
452 if (entry->fd == fsp->fd &&
453 entry->start == start &&
454 entry->size == size) {
456 /* Make a copy if requested. */
457 if (pl)
458 *pl = *entry;
460 /* Found it - delete it. */
461 if (count == 1) {
462 tdb_delete(posix_lock_tdb, kbuf);
463 } else {
464 if (i < count-1) {
465 memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
467 dbuf.dsize -= sizeof(*locks);
468 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
470 count--;
471 found = True;
472 break;
476 if (!found)
477 goto fail;
480 * Count the number of entries that are
481 * overlapped by this unlock request.
484 for (i = 0; i < count; i++) {
485 struct posix_lock *entry = &locks[i];
487 if (fsp->fd == entry->fd &&
488 does_lock_overlap( start, size, entry->start, entry->size))
489 num_overlapping_records++;
492 DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
493 posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
494 (unsigned int)num_overlapping_records ));
496 SAFE_FREE(dbuf.dptr);
498 return num_overlapping_records;
500 fail:
501 SAFE_FREE(dbuf.dptr);
502 return -1;
505 /****************************************************************************
506 Utility function to map a lock type correctly depending on the open
507 mode of a file.
508 ****************************************************************************/
510 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
512 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
514 * Many UNIX's cannot get a write lock on a file opened read-only.
515 * Win32 locking semantics allow this.
516 * Do the best we can and attempt a read-only lock.
518 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
519 return F_RDLCK;
520 } else if((lock_type == READ_LOCK) && !fsp->can_read) {
522 * Ditto for read locks on write only files.
524 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
525 return F_WRLCK;
529 * This return should be the most normal, as we attempt
530 * to always open files read/write.
533 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
536 /****************************************************************************
537 Check to see if the given unsigned lock range is within the possible POSIX
538 range. Modifies the given args to be in range if possible, just returns
539 False if not.
540 ****************************************************************************/
542 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
543 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
545 SMB_OFF_T offset = (SMB_OFF_T)u_offset;
546 SMB_OFF_T count = (SMB_OFF_T)u_count;
549 * For the type of system we are, attempt to
550 * find the maximum positive lock offset as an SMB_OFF_T.
553 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
556 * In this case SMB_OFF_T is 64 bits,
557 * and the underlying system can handle 64 bit signed locks.
560 SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
561 SMB_OFF_T mask = (mask2<<1);
562 SMB_OFF_T max_positive_lock_offset = ~mask;
564 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
567 * In this case either SMB_OFF_T is 32 bits,
568 * or the underlying system cannot handle 64 bit signed locks.
569 * All offsets & counts must be 2^31 or less.
572 SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
574 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
577 * POSIX locks of length zero mean lock to end-of-file.
578 * Win32 locks of length zero are point probes. Ignore
579 * any Win32 locks of length zero. JRA.
582 if (count == (SMB_OFF_T)0) {
583 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
584 return False;
588 * If the given offset was > max_positive_lock_offset then we cannot map this at all
589 * ignore this lock.
592 if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
593 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
594 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
595 return False;
599 * We must truncate the offset and count to less than max_positive_lock_offset.
602 offset &= max_positive_lock_offset;
603 count &= max_positive_lock_offset;
607 * Deal with a very common case of count of all ones.
608 * (lock entire file).
611 if(count == (SMB_OFF_T)-1)
612 count = max_positive_lock_offset;
615 * Truncate count to end at max lock offset.
618 if (offset + count < 0 || offset + count > max_positive_lock_offset)
619 count = max_positive_lock_offset - offset;
622 * If we ate all the count, ignore this lock.
625 if (count == 0) {
626 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
627 (double)u_offset, (double)u_count ));
628 return False;
632 * The mapping was successful.
635 DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
636 (double)offset, (double)count ));
638 *offset_out = offset;
639 *count_out = count;
641 return True;
644 /****************************************************************************
645 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
646 broken NFS implementations.
647 ****************************************************************************/
649 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
651 int ret;
652 struct connection_struct *conn = fsp->conn;
654 DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
656 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
658 if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno == EINVAL))) {
660 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
661 (double)offset,(double)count));
662 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
663 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
666 * If the offset is > 0x7FFFFFFF then this will cause problems on
667 * 32 bit NFS mounted filesystems. Just ignore it.
670 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
671 DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
672 return True;
675 if (count & ~((SMB_OFF_T)0x7fffffff)) {
676 /* 32 bit NFS file system, retry with smaller offset */
677 DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
678 errno = 0;
679 count &= 0x7fffffff;
680 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
684 DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
686 return ret;
689 /****************************************************************************
690 POSIX function to see if a file region is locked. Returns True if the
691 region is locked, False otherwise.
692 ****************************************************************************/
694 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
696 SMB_OFF_T offset;
697 SMB_OFF_T count;
698 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
700 DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
701 fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
704 * If the requested lock won't fit in the POSIX range, we will
705 * never set it, so presume it is not locked.
708 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
709 return False;
712 * Note that most UNIX's can *test* for a write lock on
713 * a read-only fd, just not *set* a write lock on a read-only
714 * fd. So we don't need to use map_lock_type here.
717 return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
721 * Structure used when splitting a lock range
722 * into a POSIX lock range. Doubly linked list.
725 struct lock_list {
726 struct lock_list *next;
727 struct lock_list *prev;
728 SMB_OFF_T start;
729 SMB_OFF_T size;
732 /****************************************************************************
733 Create a list of lock ranges that don't overlap a given range. Used in calculating
734 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
735 understand it :-).
736 ****************************************************************************/
738 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
740 TDB_DATA kbuf = locking_key_fsp(fsp);
741 TDB_DATA dbuf;
742 struct posix_lock *locks;
743 size_t num_locks, i;
745 dbuf.dptr = NULL;
747 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
749 if (!dbuf.dptr)
750 return lhead;
752 locks = (struct posix_lock *)dbuf.dptr;
753 num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
756 * Check the current lock list on this dev/inode pair.
757 * Quit if the list is deleted.
760 DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
761 (double)lhead->start, (double)lhead->size ));
763 for (i=0; i<num_locks && lhead; i++) {
765 struct posix_lock *lock = &locks[i];
766 struct lock_list *l_curr;
769 * Walk the lock list, checking for overlaps. Note that
770 * the lock list can expand within this loop if the current
771 * range being examined needs to be split.
774 for (l_curr = lhead; l_curr;) {
776 DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
777 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
779 if ( (l_curr->start >= (lock->start + lock->size)) ||
780 (lock->start >= (l_curr->start + l_curr->size))) {
782 /* No overlap with this lock - leave this range alone. */
783 /*********************************************
784 +---------+
785 | l_curr |
786 +---------+
787 +-------+
788 | lock |
789 +-------+
790 OR....
791 +---------+
792 | l_curr |
793 +---------+
794 **********************************************/
796 DEBUG(10,("no overlap case.\n" ));
798 l_curr = l_curr->next;
800 } else if ( (l_curr->start >= lock->start) &&
801 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
804 * This unlock is completely overlapped by this existing lock range
805 * and thus should have no effect (not be unlocked). Delete it from the list.
807 /*********************************************
808 +---------+
809 | l_curr |
810 +---------+
811 +---------------------------+
812 | lock |
813 +---------------------------+
814 **********************************************/
815 /* Save the next pointer */
816 struct lock_list *ul_next = l_curr->next;
818 DEBUG(10,("delete case.\n" ));
820 DLIST_REMOVE(lhead, l_curr);
821 if(lhead == NULL)
822 break; /* No more list... */
824 l_curr = ul_next;
826 } else if ( (l_curr->start >= lock->start) &&
827 (l_curr->start < lock->start + lock->size) &&
828 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
831 * This unlock overlaps the existing lock range at the high end.
832 * Truncate by moving start to existing range end and reducing size.
834 /*********************************************
835 +---------------+
836 | l_curr |
837 +---------------+
838 +---------------+
839 | lock |
840 +---------------+
841 BECOMES....
842 +-------+
843 | l_curr|
844 +-------+
845 **********************************************/
847 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
848 l_curr->start = lock->start + lock->size;
850 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
851 (double)l_curr->start, (double)l_curr->size ));
853 l_curr = l_curr->next;
855 } else if ( (l_curr->start < lock->start) &&
856 (l_curr->start + l_curr->size > lock->start) &&
857 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
860 * This unlock overlaps the existing lock range at the low end.
861 * Truncate by reducing size.
863 /*********************************************
864 +---------------+
865 | l_curr |
866 +---------------+
867 +---------------+
868 | lock |
869 +---------------+
870 BECOMES....
871 +-------+
872 | l_curr|
873 +-------+
874 **********************************************/
876 l_curr->size = lock->start - l_curr->start;
878 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
879 (double)l_curr->start, (double)l_curr->size ));
881 l_curr = l_curr->next;
883 } else if ( (l_curr->start < lock->start) &&
884 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
886 * Worst case scenario. Unlock request completely overlaps an existing
887 * lock range. Split the request into two, push the new (upper) request
888 * into the dlink list, and continue with the entry after ul_new (as we
889 * know that ul_new will not overlap with this lock).
891 /*********************************************
892 +---------------------------+
893 | l_curr |
894 +---------------------------+
895 +---------+
896 | lock |
897 +---------+
898 BECOMES.....
899 +-------+ +---------+
900 | l_curr| | l_new |
901 +-------+ +---------+
902 **********************************************/
903 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
904 sizeof(struct lock_list));
906 if(l_new == NULL) {
907 DEBUG(0,("posix_lock_list: talloc fail.\n"));
908 return NULL; /* The talloc_destroy takes care of cleanup. */
911 ZERO_STRUCTP(l_new);
912 l_new->start = lock->start + lock->size;
913 l_new->size = l_curr->start + l_curr->size - l_new->start;
915 /* Truncate the l_curr. */
916 l_curr->size = lock->start - l_curr->start;
918 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
919 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
920 (double)l_new->start, (double)l_new->size ));
923 * Add into the dlink list after the l_curr point - NOT at lhead.
924 * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
927 l_new->prev = l_curr;
928 l_new->next = l_curr->next;
929 l_curr->next = l_new;
931 /* And move after the link we added. */
932 l_curr = l_new->next;
934 } else {
937 * This logic case should never happen. Ensure this is the
938 * case by forcing an abort.... Remove in production.
940 pstring msg;
942 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
943 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
945 smb_panic(msg);
947 } /* end for ( l_curr = lhead; l_curr;) */
948 } /* end for (i=0; i<num_locks && ul_head; i++) */
950 SAFE_FREE(dbuf.dptr);
952 return lhead;
955 /****************************************************************************
956 POSIX function to acquire a lock. Returns True if the
957 lock could be granted, False if not.
958 ****************************************************************************/
960 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
962 SMB_OFF_T offset;
963 SMB_OFF_T count;
964 BOOL ret = True;
965 size_t entry_num = 0;
966 size_t lock_count;
967 TALLOC_CTX *l_ctx = NULL;
968 struct lock_list *llist = NULL;
969 struct lock_list *ll = NULL;
970 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
972 DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
973 fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
976 * If the requested lock won't fit in the POSIX range, we will
977 * pretend it was successful.
980 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
981 return True;
984 * Windows is very strange. It allows read locks to be overlayed
985 * (even over a write lock), but leaves the write lock in force until the first
986 * unlock. It also reference counts the locks. This means the following sequence :
988 * process1 process2
989 * ------------------------------------------------------------------------
990 * WRITE LOCK : start = 2, len = 10
991 * READ LOCK: start =0, len = 10 - FAIL
992 * READ LOCK : start = 0, len = 14
993 * READ LOCK: start =0, len = 10 - FAIL
994 * UNLOCK : start = 2, len = 10
995 * READ LOCK: start =0, len = 10 - OK
997 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
998 * would leave a single read lock over the 0-14 region. In order to
999 * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1000 * entries, one for each overlayed lock request. We are guarenteed by the brlock
1001 * semantics that if a write lock is added, then it will be first in the array.
1004 if ((l_ctx = talloc_init()) == NULL) {
1005 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1006 return True; /* Not a fatal error. */
1009 if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1010 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1011 talloc_destroy(l_ctx);
1012 return True; /* Not a fatal error. */
1016 * Create the initial list entry containing the
1017 * lock we want to add.
1020 ZERO_STRUCTP(ll);
1021 ll->start = offset;
1022 ll->size = count;
1024 DLIST_ADD(llist, ll);
1027 * The following call calculates if there are any
1028 * overlapping locks held by this process on
1029 * fd's open on the same file and splits this list
1030 * into a list of lock ranges that do not overlap with existing
1031 * POSIX locks.
1034 llist = posix_lock_list(l_ctx, llist, fsp);
1037 * Now we have the list of ranges to lock it is safe to add the
1038 * entry into the POSIX lock tdb. We take note of the entry we
1039 * added here in case we have to remove it on POSIX lock fail.
1042 if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1043 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1044 talloc_destroy(l_ctx);
1045 return False;
1049 * Add the POSIX locks on the list of ranges returned.
1050 * As the lock is supposed to be added atomically, we need to
1051 * back out all the locks if any one of these calls fail.
1054 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1055 offset = ll->start;
1056 count = ll->size;
1058 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1059 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1061 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1062 DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1063 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1064 ret = False;
1065 break;
1069 if (!ret) {
1072 * Back out all the POSIX locks we have on fail.
1075 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1076 offset = ll->start;
1077 count = ll->size;
1079 DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1080 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1082 posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1086 * Remove the tdb entry for this lock.
1089 delete_posix_lock_entry_by_index(fsp,entry_num);
1092 talloc_destroy(l_ctx);
1093 return ret;
1096 /****************************************************************************
1097 POSIX function to release a lock. Returns True if the
1098 lock could be released, False if not.
1099 ****************************************************************************/
1101 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1103 SMB_OFF_T offset;
1104 SMB_OFF_T count;
1105 BOOL ret = True;
1106 TALLOC_CTX *ul_ctx = NULL;
1107 struct lock_list *ulist = NULL;
1108 struct lock_list *ul = NULL;
1109 struct posix_lock deleted_lock;
1110 int num_overlapped_entries;
1112 DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1113 fsp->fsp_name, (double)u_offset, (double)u_count ));
1116 * If the requested lock won't fit in the POSIX range, we will
1117 * pretend it was successful.
1120 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1121 return True;
1124 * We treat this as one unlock request for POSIX accounting purposes even
1125 * if it may later be split into multiple smaller POSIX unlock ranges.
1126 * num_overlapped_entries is the number of existing locks that have any
1127 * overlap with this unlock request.
1130 num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1132 if (num_overlapped_entries == -1) {
1133 smb_panic("release_posix_lock: unable find entry to delete !\n");
1137 * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1138 * a POSIX write lock, then before doing the unlock we need to downgrade
1139 * the POSIX lock to a read lock. This allows any overlapping read locks
1140 * to be atomically maintained.
1143 if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1144 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1145 DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1146 return False;
1150 if ((ul_ctx = talloc_init()) == NULL) {
1151 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1152 return True; /* Not a fatal error. */
1155 if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1156 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1157 talloc_destroy(ul_ctx);
1158 return True; /* Not a fatal error. */
1162 * Create the initial list entry containing the
1163 * lock we want to remove.
1166 ZERO_STRUCTP(ul);
1167 ul->start = offset;
1168 ul->size = count;
1170 DLIST_ADD(ulist, ul);
1173 * The following call calculates if there are any
1174 * overlapping locks held by this process on
1175 * fd's open on the same file and creates a
1176 * list of unlock ranges that will allow
1177 * POSIX lock ranges to remain on the file whilst the
1178 * unlocks are performed.
1181 ulist = posix_lock_list(ul_ctx, ulist, fsp);
1184 * Release the POSIX locks on the list of ranges returned.
1187 for(; ulist; ulist = ulist->next) {
1188 offset = ulist->start;
1189 count = ulist->size;
1191 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1192 (double)offset, (double)count ));
1194 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1195 ret = False;
1198 talloc_destroy(ul_ctx);
1200 return ret;
1203 /****************************************************************************
1204 Remove all lock entries for a specific dev/inode pair from the tdb.
1205 ****************************************************************************/
1207 static void delete_posix_lock_entries(files_struct *fsp)
1209 TDB_DATA kbuf = locking_key_fsp(fsp);
1211 if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1212 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1215 /****************************************************************************
1216 Debug function.
1217 ****************************************************************************/
1219 static void dump_entry(struct posix_lock *pl)
1221 DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1222 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1225 /****************************************************************************
1226 Remove any locks on this fd. Called from file_close().
1227 ****************************************************************************/
1229 void posix_locking_close_file(files_struct *fsp)
1231 struct posix_lock *entries = NULL;
1232 size_t count, i;
1235 * Optimization for the common case where we are the only
1236 * opener of a file. If all fd entries are our own, we don't
1237 * need to explicitly release all the locks via the POSIX functions,
1238 * we can just remove all the entries in the tdb and allow the
1239 * close to remove the real locks.
1242 count = get_posix_lock_entries(fsp, &entries);
1244 if (count == 0) {
1245 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1246 return;
1249 for (i = 0; i < count; i++) {
1250 if (entries[i].fd != fsp->fd )
1251 break;
1253 dump_entry(&entries[i]);
1256 if (i == count) {
1257 /* All locks are ours. */
1258 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1259 fsp->fsp_name, (unsigned int)count ));
1260 SAFE_FREE(entries);
1261 delete_posix_lock_entries(fsp);
1262 return;
1266 * Difficult case. We need to delete all our locks, whilst leaving
1267 * all other POSIX locks in place.
1270 for (i = 0; i < count; i++) {
1271 struct posix_lock *pl = &entries[i];
1272 if (pl->fd == fsp->fd)
1273 release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1275 SAFE_FREE(entries);
1278 /*******************************************************************
1279 Create the in-memory POSIX lock databases.
1280 ********************************************************************/
1282 BOOL posix_locking_init(int read_only)
1284 if (posix_lock_tdb && posix_pending_close_tdb)
1285 return True;
1287 if (!posix_lock_tdb)
1288 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1289 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1290 if (!posix_lock_tdb) {
1291 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1292 return False;
1294 if (!posix_pending_close_tdb)
1295 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1296 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1297 if (!posix_pending_close_tdb) {
1298 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1299 return False;
1302 return True;
1305 /*******************************************************************
1306 Delete the in-memory POSIX lock databases.
1307 ********************************************************************/
1309 BOOL posix_locking_end(void)
1311 if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1312 return False;
1313 if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1314 return False;
1315 return True;