Tidyup fixes for fcntl spin problem.
[Samba/gbeck.git] / source / locking / posix.c
blob2a6f2be562516a1d0d5ccb9751088cefc1a432ad
1 /*
2 Unix SMB/Netbios implementation.
3 Version 3.0
4 Locking functions
5 Copyright (C) Jeremy Allison 1992-2000
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Revision History:
23 POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
26 #include "includes.h"
27 extern int DEBUGLEVEL;
30 * The POSIX locking database handle.
33 static TDB_CONTEXT *posix_lock_tdb;
36 * The pending close database handle.
39 static TDB_CONTEXT *posix_pending_close_tdb;
42 * The data in POSIX lock records is an unsorted linear array of these
43 * records. It is unnecessary to store the count as tdb provides the
44 * size of the record.
47 struct posix_lock {
48 int fd;
49 SMB_OFF_T start;
50 SMB_OFF_T size;
51 int lock_type;
55 * The data in POSIX pending close records is an unsorted linear array of int
56 * records. It is unnecessary to store the count as tdb provides the
57 * size of the record.
60 /* The key used in both the POSIX databases. */
62 struct posix_lock_key {
63 SMB_DEV_T device;
64 SMB_INO_T inode;
65 };
67 /*******************************************************************
68 Form a static locking key for a dev/inode pair.
69 ******************************************************************/
71 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 static struct posix_lock_key key;
74 TDB_DATA kbuf;
76 memset(&key, '\0', sizeof(key));
77 key.device = dev;
78 key.inode = inode;
79 kbuf.dptr = (char *)&key;
80 kbuf.dsize = sizeof(key);
81 return kbuf;
84 /*******************************************************************
85 Convenience function to get a key from an fsp.
86 ******************************************************************/
88 static TDB_DATA locking_key_fsp(files_struct *fsp)
90 return locking_key(fsp->dev, fsp->inode);
93 /****************************************************************************
94 Add an fd to the pending close tdb.
95 ****************************************************************************/
97 static BOOL add_fd_to_close_entry(files_struct *fsp)
99 TDB_DATA kbuf = locking_key_fsp(fsp);
100 TDB_DATA dbuf;
101 char *tp;
103 dbuf.dptr = NULL;
105 dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
107 tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
108 if (!tp) {
109 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
110 if (dbuf.dptr) free(dbuf.dptr);
111 return False;
113 else dbuf.dptr = tp;
114 memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
115 dbuf.dsize += sizeof(int);
117 if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
121 free(dbuf.dptr);
122 return True;
125 /****************************************************************************
126 Remove all fd entries for a specific dev/inode pair from the tdb.
127 ****************************************************************************/
129 static void delete_close_entries(files_struct *fsp)
131 TDB_DATA kbuf = locking_key_fsp(fsp);
133 if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
137 /****************************************************************************
138 Get the array of POSIX pending close records for an open fsp. Caller must
139 free. Returns number of entries.
140 ****************************************************************************/
142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
144 TDB_DATA kbuf = locking_key_fsp(fsp);
145 TDB_DATA dbuf;
146 size_t count = 0;
148 *entries = NULL;
149 dbuf.dptr = NULL;
151 dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
153 if (!dbuf.dptr) {
154 return 0;
157 *entries = (int *)dbuf.dptr;
158 count = (size_t)(dbuf.dsize / sizeof(int));
160 return count;
163 /****************************************************************************
164 Get the array of POSIX locks for an fsp. Caller must free. Returns
165 number of entries.
166 ****************************************************************************/
168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
170 TDB_DATA kbuf = locking_key_fsp(fsp);
171 TDB_DATA dbuf;
172 size_t count = 0;
174 *entries = NULL;
176 dbuf.dptr = NULL;
178 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
180 if (!dbuf.dptr) {
181 return 0;
184 *entries = (struct posix_lock *)dbuf.dptr;
185 count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
187 return count;
190 /****************************************************************************
191 Deal with pending closes needed by POSIX locking support.
192 Note that posix_locking_close_file() is expected to have been called
193 to delete all locks on this fsp before this function is called.
194 ****************************************************************************/
196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
198 int saved_errno = 0;
199 int ret;
200 size_t count, i;
201 struct posix_lock *entries = NULL;
202 int *fd_array = NULL;
203 BOOL locks_on_other_fds = False;
205 if (!lp_posix_locking(SNUM(conn))) {
207 * No POSIX to worry about, just close.
209 ret = conn->vfs_ops.close(fsp,fsp->fd);
210 fsp->fd = -1;
211 return ret;
215 * Get the number of outstanding POSIX locks on this dev/inode pair.
218 count = get_posix_lock_entries(fsp, &entries);
221 * Check if there are any outstanding locks belonging to
222 * other fd's. This should never be the case if posix_locking_close_file()
223 * has been called first, but it never hurts to be *sure*.
226 for (i = 0; i < count; i++) {
227 if (entries[i].fd != fsp->fd) {
228 locks_on_other_fds = True;
229 break;
233 if (locks_on_other_fds) {
236 * There are outstanding locks on this dev/inode pair on other fds.
237 * Add our fd to the pending close tdb and set fsp->fd to -1.
240 if (!add_fd_to_close_entry(fsp)) {
241 free((char *)entries);
242 return False;
245 free((char *)entries);
246 fsp->fd = -1;
247 return 0;
250 if(entries)
251 free((char *)entries);
254 * No outstanding POSIX locks. Get the pending close fd's
255 * from the tdb and close them all.
258 count = get_posix_pending_close_entries(fsp, &fd_array);
260 if (count) {
261 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
263 for(i = 0; i < count; i++) {
264 if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
265 saved_errno = errno;
270 * Delete all fd's stored in the tdb
271 * for this dev/inode pair.
274 delete_close_entries(fsp);
277 if (fd_array)
278 free((char *)fd_array);
281 * Finally close the fd associated with this fsp.
284 ret = conn->vfs_ops.close(fsp,fsp->fd);
286 if (saved_errno != 0) {
287 errno = saved_errno;
288 ret = -1;
291 fsp->fd = -1;
293 return ret;
296 /****************************************************************************
297 Debugging aid :-).
298 ****************************************************************************/
300 static const char *posix_lock_type_name(int lock_type)
302 return (lock_type == F_RDLCK) ? "READ" : "WRITE";
305 /****************************************************************************
306 Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
307 then the POSIX fcntl lock fails.
308 ****************************************************************************/
310 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
312 TDB_DATA kbuf = locking_key_fsp(fsp);
313 TDB_DATA dbuf;
314 struct posix_lock *locks;
315 size_t count;
317 dbuf.dptr = NULL;
319 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
321 if (!dbuf.dptr) {
322 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
323 goto fail;
326 count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
327 locks = (struct posix_lock *)dbuf.dptr;
329 if (count == 1) {
330 tdb_delete(posix_lock_tdb, kbuf);
331 } else {
332 if (entry < count-1) {
333 memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
335 dbuf.dsize -= sizeof(*locks);
336 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
339 free(dbuf.dptr);
341 return True;
343 fail:
344 if (dbuf.dptr)
345 free(dbuf.dptr);
346 return False;
349 /****************************************************************************
350 Add an entry into the POSIX locking tdb. We return the index number of the
351 added lock (used in case we need to delete *exactly* this entry). Returns
352 False on fail, True on success.
353 ****************************************************************************/
355 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
357 TDB_DATA kbuf = locking_key_fsp(fsp);
358 TDB_DATA dbuf;
359 struct posix_lock pl;
360 char *tp;
362 dbuf.dptr = NULL;
364 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
366 *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
369 * Add new record.
372 pl.fd = fsp->fd;
373 pl.start = start;
374 pl.size = size;
375 pl.lock_type = lock_type;
377 tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
378 if (!tp) {
379 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
380 goto fail;
382 else dbuf.dptr = tp;
384 memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
385 dbuf.dsize += sizeof(pl);
387 if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
388 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
389 goto fail;
392 free(dbuf.dptr);
394 DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
395 fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
396 (double)fsp->dev, (double)fsp->inode ));
398 return True;
400 fail:
401 if (dbuf.dptr)
402 free(dbuf.dptr);
403 return False;
406 /****************************************************************************
407 Calculate if locks have any overlap at all.
408 ****************************************************************************/
410 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
412 if (start1 >= start2 && start1 <= start2 + size2)
413 return True;
415 if (start1 < start2 && start1 + size1 > start2)
416 return True;
418 return False;
421 /****************************************************************************
422 Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
423 deleted and the number of records that are overlapped by this one, or -1 on error.
424 ****************************************************************************/
426 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
428 TDB_DATA kbuf = locking_key_fsp(fsp);
429 TDB_DATA dbuf;
430 struct posix_lock *locks;
431 size_t i, count;
432 BOOL found = False;
433 int num_overlapping_records = 0;
435 dbuf.dptr = NULL;
437 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
439 if (!dbuf.dptr) {
440 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
441 goto fail;
444 /* There are existing locks - find a match. */
445 locks = (struct posix_lock *)dbuf.dptr;
446 count = (size_t)(dbuf.dsize / sizeof(*locks));
449 * Search for and delete the first record that matches the
450 * unlock criteria.
453 for (i=0; i<count; i++) {
454 struct posix_lock *entry = &locks[i];
456 if (entry->fd == fsp->fd &&
457 entry->start == start &&
458 entry->size == size) {
460 /* Make a copy if requested. */
461 if (pl)
462 *pl = *entry;
464 /* Found it - delete it. */
465 if (count == 1) {
466 tdb_delete(posix_lock_tdb, kbuf);
467 } else {
468 if (i < count-1) {
469 memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
471 dbuf.dsize -= sizeof(*locks);
472 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
474 count--;
475 found = True;
476 break;
480 if (!found)
481 goto fail;
484 * Count the number of entries that are
485 * overlapped by this unlock request.
488 for (i = 0; i < count; i++) {
489 struct posix_lock *entry = &locks[i];
491 if (fsp->fd == entry->fd &&
492 does_lock_overlap( start, size, entry->start, entry->size))
493 num_overlapping_records++;
496 DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
497 posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
498 (unsigned int)num_overlapping_records ));
500 if (dbuf.dptr)
501 free(dbuf.dptr);
503 return num_overlapping_records;
505 fail:
506 if (dbuf.dptr)
507 free(dbuf.dptr);
508 return -1;
511 /****************************************************************************
512 Utility function to map a lock type correctly depending on the open
513 mode of a file.
514 ****************************************************************************/
516 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
518 if((lock_type == WRITE_LOCK) && !fsp->can_write) {
520 * Many UNIX's cannot get a write lock on a file opened read-only.
521 * Win32 locking semantics allow this.
522 * Do the best we can and attempt a read-only lock.
524 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
525 return F_RDLCK;
526 } else if((lock_type == READ_LOCK) && !fsp->can_read) {
528 * Ditto for read locks on write only files.
530 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
531 return F_WRLCK;
535 * This return should be the most normal, as we attempt
536 * to always open files read/write.
539 return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
542 /****************************************************************************
543 Check to see if the given unsigned lock range is within the possible POSIX
544 range. Modifies the given args to be in range if possible, just returns
545 False if not.
546 ****************************************************************************/
548 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
549 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
551 SMB_OFF_T offset = (SMB_OFF_T)u_offset;
552 SMB_OFF_T count = (SMB_OFF_T)u_count;
555 * For the type of system we are, attempt to
556 * find the maximum positive lock offset as an SMB_OFF_T.
559 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
562 * In this case SMB_OFF_T is 64 bits,
563 * and the underlying system can handle 64 bit signed locks.
566 SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
567 SMB_OFF_T mask = (mask2<<1);
568 SMB_OFF_T max_positive_lock_offset = ~mask;
570 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
573 * In this case either SMB_OFF_T is 32 bits,
574 * or the underlying system cannot handle 64 bit signed locks.
575 * All offsets & counts must be 2^31 or less.
578 SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
580 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
583 * If the given offset was > max_positive_lock_offset then we cannot map this at all
584 * ignore this lock.
587 if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
588 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
589 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
590 return False;
594 * We must truncate the offset and count to less than max_positive_lock_offset.
597 offset &= max_positive_lock_offset;
598 count &= max_positive_lock_offset;
602 * Deal with a very common case of count of all ones.
603 * (lock entire file).
606 if(count == (SMB_OFF_T)-1)
607 count = max_positive_lock_offset;
610 * Truncate count to end at max lock offset.
613 if (offset + count < 0 || offset + count > max_positive_lock_offset)
614 count = max_positive_lock_offset - offset;
617 * If we ate all the count, ignore this lock.
620 if (count == 0) {
621 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
622 (double)u_offset, (double)u_count ));
623 return False;
627 * The mapping was successful.
630 DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
631 (double)offset, (double)count ));
633 *offset_out = offset;
634 *count_out = count;
636 return True;
639 /****************************************************************************
640 Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
641 ****************************************************************************/
643 uint32 map_lock_offset(uint32 high, uint32 low)
645 unsigned int i;
646 uint32 mask = 0;
647 uint32 highcopy = high;
650 * Try and find out how many significant bits there are in high.
653 for(i = 0; highcopy; i++)
654 highcopy >>= 1;
657 * We use 31 bits not 32 here as POSIX
658 * lock offsets may not be negative.
661 mask = (~0) << (31 - i);
663 if(low & mask)
664 return 0; /* Fail. */
666 high <<= (31 - i);
668 return (high|low);
671 /****************************************************************************
672 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
673 broken NFS implementations.
674 ****************************************************************************/
676 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
678 int ret;
679 struct connection_struct *conn = fsp->conn;
681 #if defined(LARGE_SMB_OFF_T)
683 * In the 64 bit locking case we store the original
684 * values in case we have to map to a 32 bit lock on
685 * a filesystem that doesn't support 64 bit locks.
687 SMB_OFF_T orig_offset = offset;
688 SMB_OFF_T orig_count = count;
689 #endif /* LARGE_SMB_OFF_T */
691 DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
693 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
695 if (!ret && (errno == EFBIG)) {
696 if( DEBUGLVL( 0 )) {
697 dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
698 dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
699 dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
701 /* 32 bit NFS file system, retry with smaller offset */
702 errno = 0;
703 count &= 0x7fffffff;
704 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
707 /* A lock query - just return. */
708 if (op == SMB_F_GETLK)
709 return ret;
711 /* A lock set or unset. */
712 if (!ret) {
713 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
714 (double)offset,(double)count,op,type,strerror(errno)));
716 /* Perhaps it doesn't support this sort of locking ? */
717 if (errno == EINVAL) {
718 #if defined(LARGE_SMB_OFF_T)
721 * Ok - if we get here then we have a 64 bit lock request
722 * that has returned EINVAL. Try and map to 31 bits for offset
723 * and length and try again. This may happen if a filesystem
724 * doesn't support 64 bit offsets (efs/ufs) although the underlying
725 * OS does.
727 uint32 off_low = (orig_offset & 0xFFFFFFFF);
728 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
730 count = (orig_count & 0x7FFFFFFF);
731 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
732 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
733 if (!ret) {
734 if (errno == EINVAL) {
735 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
736 return(True);
738 return False;
740 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
741 return True;
743 #else /* LARGE_SMB_OFF_T */
744 DEBUG(3,("locking not supported? returning True\n"));
745 return(True);
746 #endif /* LARGE_SMB_OFF_T */
749 return(False);
752 DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
754 return(True);
757 /****************************************************************************
758 POSIX function to see if a file region is locked. Returns True if the
759 region is locked, False otherwise.
760 ****************************************************************************/
762 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
764 SMB_OFF_T offset;
765 SMB_OFF_T count;
766 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
768 DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
769 fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
772 * If the requested lock won't fit in the POSIX range, we will
773 * never set it, so presume it is not locked.
776 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
777 return False;
780 * Note that most UNIX's can *test* for a write lock on
781 * a read-only fd, just not *set* a write lock on a read-only
782 * fd. So we don't need to use map_lock_type here.
785 return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
789 * Structure used when splitting a lock range
790 * into a POSIX lock range. Doubly linked list.
793 struct lock_list {
794 struct lock_list *next;
795 struct lock_list *prev;
796 SMB_OFF_T start;
797 SMB_OFF_T size;
800 /****************************************************************************
801 Create a list of lock ranges that don't overlap a given range. Used in calculating
802 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
803 understand it :-).
804 ****************************************************************************/
806 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
808 TDB_DATA kbuf = locking_key_fsp(fsp);
809 TDB_DATA dbuf;
810 struct posix_lock *locks;
811 size_t num_locks, i;
813 dbuf.dptr = NULL;
815 dbuf = tdb_fetch(posix_lock_tdb, kbuf);
817 if (!dbuf.dptr)
818 return lhead;
820 locks = (struct posix_lock *)dbuf.dptr;
821 num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
824 * Check the current lock list on this dev/inode pair.
825 * Quit if the list is deleted.
828 DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
829 (double)lhead->start, (double)lhead->size ));
831 for (i=0; i<num_locks && lhead; i++) {
833 struct posix_lock *lock = &locks[i];
834 struct lock_list *l_curr;
837 * Walk the lock list, checking for overlaps. Note that
838 * the lock list can expand within this loop if the current
839 * range being examined needs to be split.
842 for (l_curr = lhead; l_curr;) {
844 DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
845 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
847 if ( (l_curr->start >= (lock->start + lock->size)) ||
848 (lock->start >= (l_curr->start + l_curr->size))) {
850 /* No overlap with this lock - leave this range alone. */
851 /*********************************************
852 +---------+
853 | l_curr |
854 +---------+
855 +-------+
856 | lock |
857 +-------+
858 OR....
859 +---------+
860 | l_curr |
861 +---------+
862 **********************************************/
864 DEBUG(10,("no overlap case.\n" ));
866 l_curr = l_curr->next;
868 } else if ( (l_curr->start >= lock->start) &&
869 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
872 * This unlock is completely overlapped by this existing lock range
873 * and thus should have no effect (not be unlocked). Delete it from the list.
875 /*********************************************
876 +---------+
877 | l_curr |
878 +---------+
879 +---------------------------+
880 | lock |
881 +---------------------------+
882 **********************************************/
883 /* Save the next pointer */
884 struct lock_list *ul_next = l_curr->next;
886 DEBUG(10,("delete case.\n" ));
888 DLIST_REMOVE(lhead, l_curr);
889 if(lhead == NULL)
890 break; /* No more list... */
892 l_curr = ul_next;
894 } else if ( (l_curr->start >= lock->start) &&
895 (l_curr->start < lock->start + lock->size) &&
896 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
899 * This unlock overlaps the existing lock range at the high end.
900 * Truncate by moving start to existing range end and reducing size.
902 /*********************************************
903 +---------------+
904 | l_curr |
905 +---------------+
906 +---------------+
907 | lock |
908 +---------------+
909 BECOMES....
910 +-------+
911 | l_curr|
912 +-------+
913 **********************************************/
915 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
916 l_curr->start = lock->start + lock->size;
918 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
919 (double)l_curr->start, (double)l_curr->size ));
921 l_curr = l_curr->next;
923 } else if ( (l_curr->start < lock->start) &&
924 (l_curr->start + l_curr->size > lock->start) &&
925 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
928 * This unlock overlaps the existing lock range at the low end.
929 * Truncate by reducing size.
931 /*********************************************
932 +---------------+
933 | l_curr |
934 +---------------+
935 +---------------+
936 | lock |
937 +---------------+
938 BECOMES....
939 +-------+
940 | l_curr|
941 +-------+
942 **********************************************/
944 l_curr->size = lock->start - l_curr->start;
946 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
947 (double)l_curr->start, (double)l_curr->size ));
949 l_curr = l_curr->next;
951 } else if ( (l_curr->start < lock->start) &&
952 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
954 * Worst case scenario. Unlock request completely overlaps an existing
955 * lock range. Split the request into two, push the new (upper) request
956 * into the dlink list, and continue with the entry after ul_new (as we
957 * know that ul_new will not overlap with this lock).
959 /*********************************************
960 +---------------------------+
961 | l_curr |
962 +---------------------------+
963 +---------+
964 | lock |
965 +---------+
966 BECOMES.....
967 +-------+ +---------+
968 | l_curr| | l_new |
969 +-------+ +---------+
970 **********************************************/
971 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
972 sizeof(struct lock_list));
974 if(l_new == NULL) {
975 DEBUG(0,("posix_lock_list: talloc fail.\n"));
976 return NULL; /* The talloc_destroy takes care of cleanup. */
979 ZERO_STRUCTP(l_new);
980 l_new->start = lock->start + lock->size;
981 l_new->size = l_curr->start + l_curr->size - l_new->start;
983 /* Truncate the l_curr. */
984 l_curr->size = lock->start - l_curr->start;
986 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
987 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
988 (double)l_new->start, (double)l_new->size ));
991 * Add into the dlink list after the l_curr point - NOT at lhead.
992 * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
995 l_new->prev = l_curr;
996 l_new->next = l_curr->next;
997 l_curr->next = l_new;
999 /* And move after the link we added. */
1000 l_curr = l_new->next;
1002 } else {
1005 * This logic case should never happen. Ensure this is the
1006 * case by forcing an abort.... Remove in production.
1008 pstring msg;
1010 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
1011 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1013 smb_panic(msg);
1015 } /* end for ( l_curr = lhead; l_curr;) */
1016 } /* end for (i=0; i<num_locks && ul_head; i++) */
1018 if (dbuf.dptr)
1019 free(dbuf.dptr);
1021 return lhead;
1024 /****************************************************************************
1025 POSIX function to acquire a lock. Returns True if the
1026 lock could be granted, False if not.
1027 ****************************************************************************/
1029 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
1031 SMB_OFF_T offset;
1032 SMB_OFF_T count;
1033 BOOL ret = True;
1034 size_t entry_num = 0;
1035 size_t lock_count;
1036 TALLOC_CTX *l_ctx = NULL;
1037 struct lock_list *llist = NULL;
1038 struct lock_list *ll = NULL;
1039 int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1041 DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1042 fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1045 * If the requested lock won't fit in the POSIX range, we will
1046 * pretend it was successful.
1049 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1050 return True;
1053 * Windows is very strange. It allows read locks to be overlayed
1054 * (even over a write lock), but leaves the write lock in force until the first
1055 * unlock. It also reference counts the locks. This means the following sequence :
1057 * process1 process2
1058 * ------------------------------------------------------------------------
1059 * WRITE LOCK : start = 2, len = 10
1060 * READ LOCK: start =0, len = 10 - FAIL
1061 * READ LOCK : start = 0, len = 14
1062 * READ LOCK: start =0, len = 10 - FAIL
1063 * UNLOCK : start = 2, len = 10
1064 * READ LOCK: start =0, len = 10 - OK
1066 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1067 * would leave a single read lock over the 0-14 region. In order to
1068 * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1069 * entries, one for each overlayed lock request. We are guarenteed by the brlock
1070 * semantics that if a write lock is added, then it will be first in the array.
1073 if ((l_ctx = talloc_init()) == NULL) {
1074 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1075 return True; /* Not a fatal error. */
1078 if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1079 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1080 talloc_destroy(l_ctx);
1081 return True; /* Not a fatal error. */
1085 * Create the initial list entry containing the
1086 * lock we want to add.
1089 ZERO_STRUCTP(ll);
1090 ll->start = offset;
1091 ll->size = count;
1093 DLIST_ADD(llist, ll);
1096 * The following call calculates if there are any
1097 * overlapping locks held by this process on
1098 * fd's open on the same file and splits this list
1099 * into a list of lock ranges that do not overlap with existing
1100 * POSIX locks.
1103 llist = posix_lock_list(l_ctx, llist, fsp);
1106 * Now we have the list of ranges to lock it is safe to add the
1107 * entry into the POSIX lock tdb. We take note of the entry we
1108 * added here in case we have to remove it on POSIX lock fail.
1111 if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1112 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1113 talloc_destroy(l_ctx);
1114 return False;
1118 * Add the POSIX locks on the list of ranges returned.
1119 * As the lock is supposed to be added atomically, we need to
1120 * back out all the locks if any one of these calls fail.
1123 for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1124 offset = ll->start;
1125 count = ll->size;
1127 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1128 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1130 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1131 DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1132 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1133 ret = False;
1134 break;
1138 if (!ret) {
1141 * Back out all the POSIX locks we have on fail.
1144 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1145 offset = ll->start;
1146 count = ll->size;
1148 DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1149 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1151 posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1155 * Remove the tdb entry for this lock.
1158 delete_posix_lock_entry_by_index(fsp,entry_num);
1161 talloc_destroy(l_ctx);
1162 return ret;
1165 /****************************************************************************
1166 POSIX function to release a lock. Returns True if the
1167 lock could be released, False if not.
1168 ****************************************************************************/
1170 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1172 SMB_OFF_T offset;
1173 SMB_OFF_T count;
1174 BOOL ret = True;
1175 TALLOC_CTX *ul_ctx = NULL;
1176 struct lock_list *ulist = NULL;
1177 struct lock_list *ul = NULL;
1178 struct posix_lock deleted_lock;
1179 int num_overlapped_entries;
1181 DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1182 fsp->fsp_name, (double)u_offset, (double)u_count ));
1185 * If the requested lock won't fit in the POSIX range, we will
1186 * pretend it was successful.
1189 if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1190 return True;
1193 * We treat this as one unlock request for POSIX accounting purposes even
1194 * if it may later be split into multiple smaller POSIX unlock ranges.
1195 * num_overlapped_entries is the number of existing locks that have any
1196 * overlap with this unlock request.
1199 num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1201 if (num_overlapped_entries == -1) {
1202 smb_panic("release_posix_lock: unable find entry to delete !\n");
1206 * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1207 * a POSIX write lock, then before doing the unlock we need to downgrade
1208 * the POSIX lock to a read lock. This allows any overlapping read locks
1209 * to be atomically maintained.
1212 if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1213 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1214 DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1215 return False;
1219 if ((ul_ctx = talloc_init()) == NULL) {
1220 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1221 return True; /* Not a fatal error. */
1224 if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1225 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1226 talloc_destroy(ul_ctx);
1227 return True; /* Not a fatal error. */
1231 * Create the initial list entry containing the
1232 * lock we want to remove.
1235 ZERO_STRUCTP(ul);
1236 ul->start = offset;
1237 ul->size = count;
1239 DLIST_ADD(ulist, ul);
1242 * The following call calculates if there are any
1243 * overlapping locks held by this process on
1244 * fd's open on the same file and creates a
1245 * list of unlock ranges that will allow
1246 * POSIX lock ranges to remain on the file whilst the
1247 * unlocks are performed.
1250 ulist = posix_lock_list(ul_ctx, ulist, fsp);
1253 * Release the POSIX locks on the list of ranges returned.
1256 for(; ulist; ulist = ulist->next) {
1257 offset = ulist->start;
1258 count = ulist->size;
1260 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1261 (double)offset, (double)count ));
1263 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1264 ret = False;
1267 talloc_destroy(ul_ctx);
1269 return ret;
1272 /****************************************************************************
1273 Remove all lock entries for a specific dev/inode pair from the tdb.
1274 ****************************************************************************/
1276 static void delete_posix_lock_entries(files_struct *fsp)
1278 TDB_DATA kbuf = locking_key_fsp(fsp);
1280 if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1281 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1284 /****************************************************************************
1285 Debug function.
1286 ****************************************************************************/
1288 static void dump_entry(struct posix_lock *pl)
1290 DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1291 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1294 /****************************************************************************
1295 Remove any locks on this fd. Called from file_close().
1296 ****************************************************************************/
1298 void posix_locking_close_file(files_struct *fsp)
1300 struct posix_lock *entries = NULL;
1301 size_t count, i;
1304 * Optimization for the common case where we are the only
1305 * opener of a file. If all fd entries are our own, we don't
1306 * need to explicitly release all the locks via the POSIX functions,
1307 * we can just remove all the entries in the tdb and allow the
1308 * close to remove the real locks.
1311 count = get_posix_lock_entries(fsp, &entries);
1313 if (count == 0) {
1314 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1315 return;
1318 for (i = 0; i < count; i++) {
1319 if (entries[i].fd != fsp->fd )
1320 break;
1322 dump_entry(&entries[i]);
1325 if (i == count) {
1326 /* All locks are ours. */
1327 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1328 fsp->fsp_name, (unsigned int)count ));
1329 free((char *)entries);
1330 delete_posix_lock_entries(fsp);
1331 return;
1335 * Difficult case. We need to delete all our locks, whilst leaving
1336 * all other POSIX locks in place.
1339 for (i = 0; i < count; i++) {
1340 struct posix_lock *pl = &entries[i];
1341 if (pl->fd == fsp->fd)
1342 release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1344 free((char *)entries);
1347 /*******************************************************************
1348 Create the in-memory POSIX lock databases.
1349 ********************************************************************/
1351 BOOL posix_locking_init(int read_only)
1353 if (posix_lock_tdb && posix_pending_close_tdb)
1354 return True;
1356 if (!posix_lock_tdb)
1357 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1358 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1359 if (!posix_lock_tdb) {
1360 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1361 return False;
1363 if (!posix_pending_close_tdb)
1364 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1365 read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1366 if (!posix_pending_close_tdb) {
1367 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1368 return False;
1371 return True;
1374 /*******************************************************************
1375 Delete the in-memory POSIX lock databases.
1376 ********************************************************************/
1378 BOOL posix_locking_end(void)
1380 if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1381 return False;
1382 if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1383 return False;
1384 return True;