source/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2000
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20    Revision History:
  21
  22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  23 */
  24
  25 #include "includes.h"
  26
  27 #undef DBGC_CLASS
  28 #define DBGC_CLASS DBGC_LOCKING
  29
  30 /*
  31  * The POSIX locking database handle.
  32  */
  33
  34 static TDB_CONTEXT *posix_lock_tdb;
  35
  36 /*
  37  * The pending close database handle.
  38  */
  39
  40 static TDB_CONTEXT *posix_pending_close_tdb;
  41
  42 /*
  43  * The data in POSIX lock records is an unsorted linear array of these
  44  * records.  It is unnecessary to store the count as tdb provides the
  45  * size of the record.
  46  */
  47
  48 struct posix_lock {
  49         int fd;
  50         SMB_OFF_T start;
  51         SMB_OFF_T size;
  52         int lock_type;
  53 };
  54
  55 /*
  56  * The data in POSIX pending close records is an unsorted linear array of int
  57  * records.  It is unnecessary to store the count as tdb provides the
  58  * size of the record.
  59  */
  60
  61 /* The key used in both the POSIX databases. */
  62
  63 struct posix_lock_key {
  64         SMB_DEV_T device;
  65         SMB_INO_T inode;
  66 };
  67
  68 /*******************************************************************
  69  Form a static locking key for a dev/inode pair.
  70 ******************************************************************/
  71
  72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  73 {
  74         static struct posix_lock_key key;
  75         TDB_DATA kbuf;
  76
  77         memset(&key, '\0', sizeof(key));
  78         key.device = dev;
  79         key.inode = inode;
  80         kbuf.dptr = (char *)&key;
  81         kbuf.dsize = sizeof(key);
  82         return kbuf;
  83 }
  84
  85 /*******************************************************************
  86  Convenience function to get a key from an fsp.
  87 ******************************************************************/
  88
  89 static TDB_DATA locking_key_fsp(files_struct *fsp)
  90 {
  91         return locking_key(fsp->dev, fsp->inode);
  92 }
  93
  94 /****************************************************************************
  95  Add an fd to the pending close tdb.
  96 ****************************************************************************/
  97
  98 static BOOL add_fd_to_close_entry(files_struct *fsp)
  99 {
 100         TDB_DATA kbuf = locking_key_fsp(fsp);
 101         TDB_DATA dbuf;
 102         char *tp;
 103
 104         dbuf.dptr = NULL;
 105         dbuf.dsize = 0;
 106
 107         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 108
 109         tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
 110         if (!tp) {
 111                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 112                 SAFE_FREE(dbuf.dptr);
 113                 return False;
 114         } else
 115                 dbuf.dptr = tp;
 116
 117         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
 118         dbuf.dsize += sizeof(int);
 119
 120         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 121                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 122         }
 123
 124         SAFE_FREE(dbuf.dptr);
 125         return True;
 126 }
 127
 128 /****************************************************************************
 129  Remove all fd entries for a specific dev/inode pair from the tdb.
 130 ****************************************************************************/
 131
 132 static void delete_close_entries(files_struct *fsp)
 133 {
 134         TDB_DATA kbuf = locking_key_fsp(fsp);
 135
 136         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 137                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 138 }
 139
 140 /****************************************************************************
 141  Get the array of POSIX pending close records for an open fsp. Caller must
 142  free. Returns number of entries.
 143 ****************************************************************************/
 144
 145 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 146 {
 147         TDB_DATA kbuf = locking_key_fsp(fsp);
 148         TDB_DATA dbuf;
 149         size_t count = 0;
 150
 151         *entries = NULL;
 152         dbuf.dptr = NULL;
 153
 154         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 155
 156         if (!dbuf.dptr) {
 157                 return 0;
 158         }
 159
 160         *entries = (int *)dbuf.dptr;
 161         count = (size_t)(dbuf.dsize / sizeof(int));
 162
 163         return count;
 164 }
 165
 166 /****************************************************************************
 167  Get the array of POSIX locks for an fsp. Caller must free. Returns
 168  number of entries.
 169 ****************************************************************************/
 170
 171 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 172 {
 173         TDB_DATA kbuf = locking_key_fsp(fsp);
 174         TDB_DATA dbuf;
 175         size_t count = 0;
 176
 177         *entries = NULL;
 178
 179         dbuf.dptr = NULL;
 180
 181         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 182
 183         if (!dbuf.dptr) {
 184                 return 0;
 185         }
 186
 187         *entries = (struct posix_lock *)dbuf.dptr;
 188         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 189
 190         return count;
 191 }
 192
 193 /****************************************************************************
 194  Deal with pending closes needed by POSIX locking support.
 195  Note that posix_locking_close_file() is expected to have been called
 196  to delete all locks on this fsp before this function is called.
 197 ****************************************************************************/
 198
 199 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 200 {
 201         int saved_errno = 0;
 202         int ret;
 203         size_t count, i;
 204         struct posix_lock *entries = NULL;
 205         int *fd_array = NULL;
 206         BOOL locks_on_other_fds = False;
 207
 208         if (!lp_posix_locking(SNUM(conn))) {
 209                 /*
 210                  * No POSIX to worry about, just close.
 211                  */
 212                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
 213                 fsp->fh->fd = -1;
 214                 return ret;
 215         }
 216
 217         /*
 218          * Get the number of outstanding POSIX locks on this dev/inode pair.
 219          */
 220
 221         count = get_posix_lock_entries(fsp, &entries);
 222
 223         /*
 224          * Check if there are any outstanding locks belonging to
 225          * other fd's. This should never be the case if posix_locking_close_file()
 226          * has been called first, but it never hurts to be *sure*.
 227          */
 228
 229         for (i = 0; i < count; i++) {
 230                 if (entries[i].fd != fsp->fh->fd) {
 231                         locks_on_other_fds = True;
 232                         break;
 233                 }
 234         }
 235
 236         if (locks_on_other_fds) {
 237
 238                 /*
 239                  * There are outstanding locks on this dev/inode pair on other fds.
 240                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
 241                  */
 242
 243                 if (!add_fd_to_close_entry(fsp)) {
 244                         SAFE_FREE(entries);
 245                         return -1;
 246                 }
 247
 248                 SAFE_FREE(entries);
 249                 fsp->fh->fd = -1;
 250                 return 0;
 251         }
 252
 253         SAFE_FREE(entries);
 254
 255         /*
 256          * No outstanding POSIX locks. Get the pending close fd's
 257          * from the tdb and close them all.
 258          */
 259
 260         count = get_posix_pending_close_entries(fsp, &fd_array);
 261
 262         if (count) {
 263                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 264
 265                 for(i = 0; i < count; i++) {
 266                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
 267                                 saved_errno = errno;
 268                         }
 269                 }
 270
 271                 /*
 272                  * Delete all fd's stored in the tdb
 273                  * for this dev/inode pair.
 274                  */
 275
 276                 delete_close_entries(fsp);
 277         }
 278
 279         SAFE_FREE(fd_array);
 280
 281         /*
 282          * Finally close the fd associated with this fsp.
 283          */
 284
 285         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
 286
 287         if (saved_errno != 0) {
 288                 errno = saved_errno;
 289                 ret = -1;
 290         }
 291
 292         fsp->fh->fd = -1;
 293
 294         return ret;
 295 }
 296
 297 /****************************************************************************
 298  Debugging aid :-).
 299 ****************************************************************************/
 300
 301 static const char *posix_lock_type_name(int lock_type)
 302 {
 303         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 304 }
 305
 306 /****************************************************************************
 307  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 308  then the POSIX fcntl lock fails.
 309 ****************************************************************************/
 310
 311 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 312 {
 313         TDB_DATA kbuf = locking_key_fsp(fsp);
 314         TDB_DATA dbuf;
 315         struct posix_lock *locks;
 316         size_t count;
 317
 318         dbuf.dptr = NULL;
 319
 320         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 321
 322         if (!dbuf.dptr) {
 323                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 324                 goto fail;
 325         }
 326
 327         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 328         locks = (struct posix_lock *)dbuf.dptr;
 329
 330         if (count == 1) {
 331                 tdb_delete(posix_lock_tdb, kbuf);
 332         } else {
 333                 if (entry < count-1) {
 334                         memmove(&locks[entry], &locks[entry+1], sizeof(struct posix_lock)*((count-1) - entry));
 335                 }
 336                 dbuf.dsize -= sizeof(struct posix_lock);
 337                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 338         }
 339
 340         SAFE_FREE(dbuf.dptr);
 341
 342         return True;
 343
 344  fail:
 345
 346         SAFE_FREE(dbuf.dptr);
 347         return False;
 348 }
 349
 350 /****************************************************************************
 351  Add an entry into the POSIX locking tdb. We return the index number of the
 352  added lock (used in case we need to delete *exactly* this entry). Returns
 353  False on fail, True on success.
 354 ****************************************************************************/
 355
 356 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 357 {
 358         TDB_DATA kbuf = locking_key_fsp(fsp);
 359         TDB_DATA dbuf;
 360         struct posix_lock pl;
 361         char *tp;
 362
 363         dbuf.dptr = NULL;
 364         dbuf.dsize = 0;
 365
 366         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 367
 368         *pentry_num = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 369
 370         /*
 371          * Add new record.
 372          */
 373
 374         pl.fd = fsp->fh->fd;
 375         pl.start = start;
 376         pl.size = size;
 377         pl.lock_type = lock_type;
 378
 379         tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(struct posix_lock));
 380         if (!tp) {
 381                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 382                 goto fail;
 383         } else
 384                 dbuf.dptr = tp;
 385
 386         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(struct posix_lock));
 387         dbuf.dsize += sizeof(struct posix_lock);
 388
 389         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 390                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 391                 goto fail;
 392         }
 393
 394         SAFE_FREE(dbuf.dptr);
 395
 396         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 397                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 398                         (double)fsp->dev, (double)fsp->inode ));
 399
 400         return True;
 401
 402  fail:
 403
 404         SAFE_FREE(dbuf.dptr);
 405         return False;
 406 }
 407
 408 /****************************************************************************
 409  Calculate if locks have any overlap at all.
 410 ****************************************************************************/
 411
 412 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 413 {
 414         if (start1 >= start2 && start1 <= start2 + size2)
 415                 return True;
 416
 417         if (start1 < start2 && start1 + size1 > start2)
 418                 return True;
 419
 420         return False;
 421 }
 422
 423 /****************************************************************************
 424  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 425  deleted and the number of records that are overlapped by this one, or -1 on error.
 426 ****************************************************************************/
 427
 428 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 429 {
 430         TDB_DATA kbuf = locking_key_fsp(fsp);
 431         TDB_DATA dbuf;
 432         struct posix_lock *locks;
 433         size_t i, count;
 434         BOOL found = False;
 435         int num_overlapping_records = 0;
 436
 437         dbuf.dptr = NULL;
 438
 439         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 440
 441         if (!dbuf.dptr) {
 442                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 443                 goto fail;
 444         }
 445
 446         /* There are existing locks - find a match. */
 447         locks = (struct posix_lock *)dbuf.dptr;
 448         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 449
 450         /*
 451          * Search for and delete the first record that matches the
 452          * unlock criteria.
 453          */
 454
 455         for (i=0; i<count; i++) {
 456                 struct posix_lock *entry = &locks[i];
 457
 458                 if (entry->fd == fsp->fh->fd &&
 459                         entry->start == start &&
 460                         entry->size == size) {
 461
 462                         /* Make a copy if requested. */
 463                         if (pl)
 464                                 *pl = *entry;
 465
 466                         /* Found it - delete it. */
 467                         if (count == 1) {
 468                                 tdb_delete(posix_lock_tdb, kbuf);
 469                         } else {
 470                                 if (i < count-1) {
 471                                         memmove(&locks[i], &locks[i+1], sizeof(struct posix_lock)*((count-1) - i));
 472                                 }
 473                                 dbuf.dsize -= sizeof(struct posix_lock);
 474                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 475                         }
 476                         count--;
 477                         found = True;
 478                         break;
 479                 }
 480         }
 481
 482         if (!found)
 483                 goto fail;
 484
 485         /*
 486          * Count the number of entries that are
 487          * overlapped by this unlock request.
 488          */
 489
 490         for (i = 0; i < count; i++) {
 491                 struct posix_lock *entry = &locks[i];
 492
 493                 if (fsp->fh->fd == entry->fd &&
 494                         does_lock_overlap( start, size, entry->start, entry->size))
 495                                 num_overlapping_records++;
 496         }
 497
 498         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 499                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 500                                 (unsigned int)num_overlapping_records ));
 501
 502         SAFE_FREE(dbuf.dptr);
 503
 504         return num_overlapping_records;
 505
 506  fail:
 507
 508         SAFE_FREE(dbuf.dptr);
 509         return -1;
 510 }
 511
 512 /****************************************************************************
 513  Utility function to map a lock type correctly depending on the open
 514  mode of a file.
 515 ****************************************************************************/
 516
 517 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 518 {
 519         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 520                 /*
 521                  * Many UNIX's cannot get a write lock on a file opened read-only.
 522                  * Win32 locking semantics allow this.
 523                  * Do the best we can and attempt a read-only lock.
 524                  */
 525                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 526                 return F_RDLCK;
 527         }
 528 #if 0
 529         /* We no longer open files write-only. */
 530          else if((lock_type == READ_LOCK) && !fsp->can_read) {
 531                 /*
 532                  * Ditto for read locks on write only files.
 533                  */
 534                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 535                 return F_WRLCK;
 536         }
 537 #endif
 538
 539         /*
 540          * This return should be the most normal, as we attempt
 541          * to always open files read/write.
 542          */
 543
 544         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 545 }
 546
 547 /****************************************************************************
 548  Check to see if the given unsigned lock range is within the possible POSIX
 549  range. Modifies the given args to be in range if possible, just returns
 550  False if not.
 551 ****************************************************************************/
 552
 553 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 554                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 555 {
 556         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 557         SMB_OFF_T count = (SMB_OFF_T)u_count;
 558
 559         /*
 560          * For the type of system we are, attempt to
 561          * find the maximum positive lock offset as an SMB_OFF_T.
 562          */
 563
 564 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
 565
 566         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 567
 568 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 569
 570         /*
 571          * In this case SMB_OFF_T is 64 bits,
 572          * and the underlying system can handle 64 bit signed locks.
 573          */
 574
 575         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 576         SMB_OFF_T mask = (mask2<<1);
 577         SMB_OFF_T max_positive_lock_offset = ~mask;
 578
 579 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 580
 581         /*
 582          * In this case either SMB_OFF_T is 32 bits,
 583          * or the underlying system cannot handle 64 bit signed locks.
 584          * All offsets & counts must be 2^31 or less.
 585          */
 586
 587         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 588
 589 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 590
 591         /*
 592          * POSIX locks of length zero mean lock to end-of-file.
 593          * Win32 locks of length zero are point probes. Ignore
 594          * any Win32 locks of length zero. JRA.
 595          */
 596
 597         if (count == (SMB_OFF_T)0) {
 598                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 599                 return False;
 600         }
 601
 602         /*
 603          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 604          * ignore this lock.
 605          */
 606
 607         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 608                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 609                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 610                 return False;
 611         }
 612
 613         /*
 614          * We must truncate the count to less than max_positive_lock_offset.
 615          */
 616
 617         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
 618                 count = max_positive_lock_offset;
 619
 620         /*
 621          * Truncate count to end at max lock offset.
 622          */
 623
 624         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 625                 count = max_positive_lock_offset - offset;
 626
 627         /*
 628          * If we ate all the count, ignore this lock.
 629          */
 630
 631         if (count == 0) {
 632                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 633                                 (double)u_offset, (double)u_count ));
 634                 return False;
 635         }
 636
 637         /*
 638          * The mapping was successful.
 639          */
 640
 641         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 642                         (double)offset, (double)count ));
 643
 644         *offset_out = offset;
 645         *count_out = count;
 646
 647         return True;
 648 }
 649
 650 /****************************************************************************
 651  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 652  broken NFS implementations.
 653 ****************************************************************************/
 654
 655 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 656 {
 657         int ret;
 658
 659         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
 660
 661         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
 662
 663         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 664
 665                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 666                                         (double)offset,(double)count));
 667                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 668                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 669
 670                 /*
 671                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 672                  * 32 bit NFS mounted filesystems. Just ignore it.
 673                  */
 674
 675                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 676                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 677                         return True;
 678                 }
 679
 680                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 681                         /* 32 bit NFS file system, retry with smaller offset */
 682                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 683                         errno = 0;
 684                         count &= 0x7fffffff;
 685                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
 686                 }
 687         }
 688
 689         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 690
 691         return ret;
 692 }
 693
 694 /****************************************************************************
 695  POSIX function to see if a file region is locked. Returns True if the
 696  region is locked, False otherwise.
 697 ****************************************************************************/
 698
 699 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 700 {
 701         SMB_OFF_T offset;
 702         SMB_OFF_T count;
 703         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 704
 705         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 706                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 707
 708         /*
 709          * If the requested lock won't fit in the POSIX range, we will
 710          * never set it, so presume it is not locked.
 711          */
 712
 713         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 714                 return False;
 715
 716         /*
 717          * Note that most UNIX's can *test* for a write lock on
 718          * a read-only fd, just not *set* a write lock on a read-only
 719          * fd. So we don't need to use map_lock_type here.
 720          */
 721
 722         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 723 }
 724
 725 /*
 726  * Structure used when splitting a lock range
 727  * into a POSIX lock range. Doubly linked list.
 728  */
 729
 730 struct lock_list {
 731         struct lock_list *next;
 732         struct lock_list *prev;
 733         SMB_OFF_T start;
 734         SMB_OFF_T size;
 735 };
 736
 737 /****************************************************************************
 738  Create a list of lock ranges that don't overlap a given range. Used in calculating
 739  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 740  understand it :-).
 741 ****************************************************************************/
 742
 743 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 744 {
 745         TDB_DATA kbuf = locking_key_fsp(fsp);
 746         TDB_DATA dbuf;
 747         struct posix_lock *locks;
 748         size_t num_locks, i;
 749
 750         dbuf.dptr = NULL;
 751
 752         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 753
 754         if (!dbuf.dptr)
 755                 return lhead;
 756
 757         locks = (struct posix_lock *)dbuf.dptr;
 758         num_locks = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 759
 760         /*
 761          * Check the current lock list on this dev/inode pair.
 762          * Quit if the list is deleted.
 763          */
 764
 765         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 766                 (double)lhead->start, (double)lhead->size ));
 767
 768         for (i=0; i<num_locks && lhead; i++) {
 769
 770                 struct posix_lock *lock = &locks[i];
 771                 struct lock_list *l_curr;
 772
 773                 /*
 774                  * Walk the lock list, checking for overlaps. Note that
 775                  * the lock list can expand within this loop if the current
 776                  * range being examined needs to be split.
 777                  */
 778
 779                 for (l_curr = lhead; l_curr;) {
 780
 781                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 782                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 783
 784                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 785                                  (lock->start >= (l_curr->start + l_curr->size))) {
 786
 787                                 /* No overlap with this lock - leave this range alone. */
 788 /*********************************************
 789                                              +---------+
 790                                              | l_curr  |
 791                                              +---------+
 792                                 +-------+
 793                                 | lock  |
 794                                 +-------+
 795 OR....
 796              +---------+
 797              |  l_curr |
 798              +---------+
 799 **********************************************/
 800
 801                                 DEBUG(10,("no overlap case.\n" ));
 802
 803                                 l_curr = l_curr->next;
 804
 805                         } else if ( (l_curr->start >= lock->start) &&
 806                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 807
 808                                 /*
 809                                  * This unlock is completely overlapped by this existing lock range
 810                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 811                                  */
 812 /*********************************************
 813                 +---------+
 814                 |  l_curr |
 815                 +---------+
 816         +---------------------------+
 817         |       lock                |
 818         +---------------------------+
 819 **********************************************/
 820                                 /* Save the next pointer */
 821                                 struct lock_list *ul_next = l_curr->next;
 822
 823                                 DEBUG(10,("delete case.\n" ));
 824
 825                                 DLIST_REMOVE(lhead, l_curr);
 826                                 if(lhead == NULL)
 827                                         break; /* No more list... */
 828
 829                                 l_curr = ul_next;
 830
 831                         } else if ( (l_curr->start >= lock->start) &&
 832                                                 (l_curr->start < lock->start + lock->size) &&
 833                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 834
 835                                 /*
 836                                  * This unlock overlaps the existing lock range at the high end.
 837                                  * Truncate by moving start to existing range end and reducing size.
 838                                  */
 839 /*********************************************
 840                 +---------------+
 841                 |  l_curr       |
 842                 +---------------+
 843         +---------------+
 844         |    lock       |
 845         +---------------+
 846 BECOMES....
 847                         +-------+
 848                         | l_curr|
 849                         +-------+
 850 **********************************************/
 851
 852                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 853                                 l_curr->start = lock->start + lock->size;
 854
 855                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 856                                                                 (double)l_curr->start, (double)l_curr->size ));
 857
 858                                 l_curr = l_curr->next;
 859
 860                         } else if ( (l_curr->start < lock->start) &&
 861                                                 (l_curr->start + l_curr->size > lock->start) &&
 862                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 863
 864                                 /*
 865                                  * This unlock overlaps the existing lock range at the low end.
 866                                  * Truncate by reducing size.
 867                                  */
 868 /*********************************************
 869    +---------------+
 870    |  l_curr       |
 871    +---------------+
 872            +---------------+
 873            |    lock       |
 874            +---------------+
 875 BECOMES....
 876    +-------+
 877    | l_curr|
 878    +-------+
 879 **********************************************/
 880
 881                                 l_curr->size = lock->start - l_curr->start;
 882
 883                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 884                                                                 (double)l_curr->start, (double)l_curr->size ));
 885
 886                                 l_curr = l_curr->next;
 887
 888                         } else if ( (l_curr->start < lock->start) &&
 889                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 890                                 /*
 891                                  * Worst case scenario. Unlock request completely overlaps an existing
 892                                  * lock range. Split the request into two, push the new (upper) request
 893                                  * into the dlink list, and continue with the entry after ul_new (as we
 894                                  * know that ul_new will not overlap with this lock).
 895                                  */
 896 /*********************************************
 897         +---------------------------+
 898         |        l_curr             |
 899         +---------------------------+
 900                 +---------+
 901                 | lock    |
 902                 +---------+
 903 BECOMES.....
 904         +-------+         +---------+
 905         | l_curr|         | l_new   |
 906         +-------+         +---------+
 907 **********************************************/
 908                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
 909
 910                                 if(l_new == NULL) {
 911                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 912                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 913                                 }
 914
 915                                 ZERO_STRUCTP(l_new);
 916                                 l_new->start = lock->start + lock->size;
 917                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 918
 919                                 /* Truncate the l_curr. */
 920                                 l_curr->size = lock->start - l_curr->start;
 921
 922                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 923 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 924                                                                 (double)l_new->start, (double)l_new->size ));
 925
 926                                 /*
 927                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 928                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 929                                  */
 930
 931                                 l_new->prev = l_curr;
 932                                 l_new->next = l_curr->next;
 933                                 l_curr->next = l_new;
 934
 935                                 /* And move after the link we added. */
 936                                 l_curr = l_new->next;
 937
 938                         } else {
 939
 940                                 /*
 941                                  * This logic case should never happen. Ensure this is the
 942                                  * case by forcing an abort.... Remove in production.
 943                                  */
 944                                 pstring msg;
 945
 946                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 947 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 948
 949                                 smb_panic(msg);
 950                         }
 951                 } /* end for ( l_curr = lhead; l_curr;) */
 952         } /* end for (i=0; i<num_locks && ul_head; i++) */
 953
 954         SAFE_FREE(dbuf.dptr);
 955
 956         return lhead;
 957 }
 958
 959 /****************************************************************************
 960  POSIX function to acquire a lock. Returns True if the
 961  lock could be granted, False if not.
 962 ****************************************************************************/
 963
 964 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 965 {
 966         SMB_OFF_T offset;
 967         SMB_OFF_T count;
 968         BOOL ret = True;
 969         size_t entry_num = 0;
 970         size_t lock_count;
 971         TALLOC_CTX *l_ctx = NULL;
 972         struct lock_list *llist = NULL;
 973         struct lock_list *ll = NULL;
 974         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 975
 976         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 977                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 978
 979         /*
 980          * If the requested lock won't fit in the POSIX range, we will
 981          * pretend it was successful.
 982          */
 983
 984         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 985                 return True;
 986
 987         /*
 988          * Windows is very strange. It allows read locks to be overlayed
 989          * (even over a write lock), but leaves the write lock in force until the first
 990          * unlock. It also reference counts the locks. This means the following sequence :
 991          *
 992          * process1                                      process2
 993          * ------------------------------------------------------------------------
 994          * WRITE LOCK : start = 2, len = 10
 995          *                                            READ LOCK: start =0, len = 10 - FAIL
 996          * READ LOCK : start = 0, len = 14
 997          *                                            READ LOCK: start =0, len = 10 - FAIL
 998          * UNLOCK : start = 2, len = 10
 999          *                                            READ LOCK: start =0, len = 10 - OK
1000          *
1001          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1002          * would leave a single read lock over the 0-14 region. In order to
1003          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1004          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1005          * semantics that if a write lock is added, then it will be first in the array.
1006          */
1007
1008         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1009                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1010                 return True; /* Not a fatal error. */
1011         }
1012
1013         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1014                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1015                 talloc_destroy(l_ctx);
1016                 return True; /* Not a fatal error. */
1017         }
1018
1019         /*
1020          * Create the initial list entry containing the
1021          * lock we want to add.
1022          */
1023
1024         ZERO_STRUCTP(ll);
1025         ll->start = offset;
1026         ll->size = count;
1027
1028         DLIST_ADD(llist, ll);
1029
1030         /*
1031          * The following call calculates if there are any
1032          * overlapping locks held by this process on
1033          * fd's open on the same file and splits this list
1034          * into a list of lock ranges that do not overlap with existing
1035          * POSIX locks.
1036          */
1037
1038         llist = posix_lock_list(l_ctx, llist, fsp);
1039
1040         /*
1041          * Now we have the list of ranges to lock it is safe to add the
1042          * entry into the POSIX lock tdb. We take note of the entry we
1043          * added here in case we have to remove it on POSIX lock fail.
1044          */
1045
1046         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1047                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1048                 talloc_destroy(l_ctx);
1049                 return False;
1050         }
1051
1052         /*
1053          * Add the POSIX locks on the list of ranges returned.
1054          * As the lock is supposed to be added atomically, we need to
1055          * back out all the locks if any one of these calls fail.
1056          */
1057
1058         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1059                 offset = ll->start;
1060                 count = ll->size;
1061
1062                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1063                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1064
1065                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1066                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1067                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1068                         ret = False;
1069                         break;
1070                 }
1071         }
1072
1073         if (!ret) {
1074
1075                 /*
1076                  * Back out all the POSIX locks we have on fail.
1077                  */
1078
1079                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1080                         offset = ll->start;
1081                         count = ll->size;
1082
1083                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1084                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1085
1086                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1087                 }
1088
1089                 /*
1090                  * Remove the tdb entry for this lock.
1091                  */
1092
1093                 delete_posix_lock_entry_by_index(fsp,entry_num);
1094         }
1095
1096         talloc_destroy(l_ctx);
1097         return ret;
1098 }
1099
1100 /****************************************************************************
1101  POSIX function to release a lock. Returns True if the
1102  lock could be released, False if not.
1103 ****************************************************************************/
1104
1105 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1106 {
1107         SMB_OFF_T offset;
1108         SMB_OFF_T count;
1109         BOOL ret = True;
1110         TALLOC_CTX *ul_ctx = NULL;
1111         struct lock_list *ulist = NULL;
1112         struct lock_list *ul = NULL;
1113         struct posix_lock deleted_lock;
1114         int num_overlapped_entries;
1115
1116         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1117                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1118
1119         /*
1120          * If the requested lock won't fit in the POSIX range, we will
1121          * pretend it was successful.
1122          */
1123
1124         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1125                 return True;
1126
1127         /*
1128          * We treat this as one unlock request for POSIX accounting purposes even
1129          * if it may later be split into multiple smaller POSIX unlock ranges.
1130          * num_overlapped_entries is the number of existing locks that have any
1131          * overlap with this unlock request.
1132          */
1133
1134         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1135
1136         if (num_overlapped_entries == -1) {
1137                 smb_panic("release_posix_lock: unable find entry to delete !\n");
1138         }
1139
1140         /*
1141          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1142          * a POSIX write lock, then before doing the unlock we need to downgrade
1143          * the POSIX lock to a read lock. This allows any overlapping read locks
1144          * to be atomically maintained.
1145          */
1146
1147         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1148                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1149                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1150                         return False;
1151                 }
1152         }
1153
1154         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1155                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1156                 return True; /* Not a fatal error. */
1157         }
1158
1159         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1160                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1161                 talloc_destroy(ul_ctx);
1162                 return True; /* Not a fatal error. */
1163         }
1164
1165         /*
1166          * Create the initial list entry containing the
1167          * lock we want to remove.
1168          */
1169
1170         ZERO_STRUCTP(ul);
1171         ul->start = offset;
1172         ul->size = count;
1173
1174         DLIST_ADD(ulist, ul);
1175
1176         /*
1177          * The following call calculates if there are any
1178          * overlapping locks held by this process on
1179          * fd's open on the same file and creates a
1180          * list of unlock ranges that will allow
1181          * POSIX lock ranges to remain on the file whilst the
1182          * unlocks are performed.
1183          */
1184
1185         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1186
1187         /*
1188          * Release the POSIX locks on the list of ranges returned.
1189          */
1190
1191         for(; ulist; ulist = ulist->next) {
1192                 offset = ulist->start;
1193                 count = ulist->size;
1194
1195                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1196                         (double)offset, (double)count ));
1197
1198                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1199                         ret = False;
1200         }
1201
1202         talloc_destroy(ul_ctx);
1203
1204         return ret;
1205 }
1206
1207 /****************************************************************************
1208  Remove all lock entries for a specific dev/inode pair from the tdb.
1209 ****************************************************************************/
1210
1211 static void delete_posix_lock_entries(files_struct *fsp)
1212 {
1213         TDB_DATA kbuf = locking_key_fsp(fsp);
1214
1215         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1216                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1217 }
1218
1219 /****************************************************************************
1220  Debug function.
1221 ****************************************************************************/
1222
1223 static void dump_entry(struct posix_lock *pl)
1224 {
1225         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1226                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1227 }
1228
1229 /****************************************************************************
1230  Remove any locks on this fd. Called from file_close().
1231 ****************************************************************************/
1232
1233 void posix_locking_close_file(files_struct *fsp)
1234 {
1235         struct posix_lock *entries = NULL;
1236         size_t count, i;
1237
1238         /*
1239          * Optimization for the common case where we are the only
1240          * opener of a file. If all fd entries are our own, we don't
1241          * need to explicitly release all the locks via the POSIX functions,
1242          * we can just remove all the entries in the tdb and allow the
1243          * close to remove the real locks.
1244          */
1245
1246         count = get_posix_lock_entries(fsp, &entries);
1247
1248         if (count == 0) {
1249                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1250                 return;
1251         }
1252
1253         for (i = 0; i < count; i++) {
1254                 if (entries[i].fd != fsp->fh->fd )
1255                         break;
1256
1257                 dump_entry(&entries[i]);
1258         }
1259
1260         if (i == count) {
1261                 /* All locks are ours. */
1262                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1263                         fsp->fsp_name, (unsigned int)count ));
1264                 SAFE_FREE(entries);
1265                 delete_posix_lock_entries(fsp);
1266                 return;
1267         }
1268
1269         /*
1270          * Difficult case. We need to delete all our locks, whilst leaving
1271          * all other POSIX locks in place.
1272          */
1273
1274         for (i = 0; i < count; i++) {
1275                 struct posix_lock *pl = &entries[i];
1276                 if (pl->fd == fsp->fh->fd)
1277                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1278         }
1279         SAFE_FREE(entries);
1280 }
1281
1282 /*******************************************************************
1283  Create the in-memory POSIX lock databases.
1284 ********************************************************************/
1285
1286 BOOL posix_locking_init(int read_only)
1287 {
1288         if (posix_lock_tdb && posix_pending_close_tdb)
1289                 return True;
1290
1291         if (!posix_lock_tdb)
1292                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1293                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1294         if (!posix_lock_tdb) {
1295                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1296                 return False;
1297         }
1298         if (!posix_pending_close_tdb)
1299                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1300                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1301         if (!posix_pending_close_tdb) {
1302                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1303                 return False;
1304         }
1305
1306         return True;
1307 }
1308
1309 /*******************************************************************
1310  Delete the in-memory POSIX lock databases.
1311 ********************************************************************/
1312
1313 BOOL posix_locking_end(void)
1314 {
1315     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1316                 return False;
1317     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1318                 return False;
1319         return True;
1320 }