source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2006
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Revision History:
  20
  21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/filesys.h"
  26 #include "locking/proto.h"
  27 #include "dbwrap/dbwrap.h"
  28 #include "dbwrap/dbwrap_rbt.h"
  29 #include "util_tdb.h"
  30
  31 #undef DBGC_CLASS
  32 #define DBGC_CLASS DBGC_LOCKING
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static struct db_context *posix_pending_close_db;
  39
  40 /****************************************************************************
  41  First - the functions that deal with the underlying system locks - these
  42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
  43  POSIX locks onto POSIX.
  44 ****************************************************************************/
  45
  46 /****************************************************************************
  47  Utility function to map a lock type correctly depending on the open
  48  mode of a file.
  49 ****************************************************************************/
  50
  51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  52 {
  53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
  54                 /*
  55                  * Many UNIX's cannot get a write lock on a file opened read-only.
  56                  * Win32 locking semantics allow this.
  57                  * Do the best we can and attempt a read-only lock.
  58                  */
  59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
  60                 return F_RDLCK;
  61         }
  62
  63         /*
  64          * This return should be the most normal, as we attempt
  65          * to always open files read/write.
  66          */
  67
  68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  69 }
  70
  71 /****************************************************************************
  72  Debugging aid :-).
  73 ****************************************************************************/
  74
  75 static const char *posix_lock_type_name(int lock_type)
  76 {
  77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  78 }
  79
  80 /****************************************************************************
  81  Check to see if the given unsigned lock range is within the possible POSIX
  82  range. Modifies the given args to be in range if possible, just returns
  83  False if not.
  84 ****************************************************************************/
  85
  86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
  87                                 uint64_t u_offset, uint64_t u_count)
  88 {
  89         off_t offset = (off_t)u_offset;
  90         off_t count = (off_t)u_count;
  91
  92         /*
  93          * For the type of system we are, attempt to
  94          * find the maximum positive lock offset as an off_t.
  95          */
  96
  97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  98
  99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 100 #else
 101         /*
 102          * In this case off_t is 64 bits,
 103          * and the underlying system can handle 64 bit signed locks.
 104          */
 105
 106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
 107         off_t mask = (mask2<<1);
 108         off_t max_positive_lock_offset = ~mask;
 109
 110 #endif
 111         /*
 112          * POSIX locks of length zero mean lock to end-of-file.
 113          * Win32 locks of length zero are point probes. Ignore
 114          * any Win32 locks of length zero. JRA.
 115          */
 116
 117         if (count == 0) {
 118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 119                 return False;
 120         }
 121
 122         /*
 123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 124          * ignore this lock.
 125          */
 126
 127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
 128                 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
 129                            "and we cannot handle this. Ignoring lock.\n",
 130                            (uintmax_t)u_offset,
 131                            (uintmax_t)max_positive_lock_offset));
 132                 return False;
 133         }
 134
 135         /*
 136          * We must truncate the count to less than max_positive_lock_offset.
 137          */
 138
 139         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
 140                 count = max_positive_lock_offset;
 141         }
 142
 143         /*
 144          * Truncate count to end at max lock offset.
 145          */
 146
 147         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
 148                 count = max_positive_lock_offset - offset;
 149         }
 150
 151         /*
 152          * If we ate all the count, ignore this lock.
 153          */
 154
 155         if (count == 0) {
 156                 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
 157                            "u_offset = %ju, u_count = %ju\n",
 158                            (uintmax_t)u_offset,
 159                            (uintmax_t)u_count));
 160                 return False;
 161         }
 162
 163         /*
 164          * The mapping was successful.
 165          */
 166
 167         DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
 168                    "count_out = %ju\n",
 169                    (uintmax_t)offset, (uintmax_t)count));
 170
 171         *offset_out = offset;
 172         *count_out = count;
 173
 174         return True;
 175 }
 176
 177 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
 178                        struct files_struct *fsp, int op, off_t offset,
 179                        off_t count, int type)
 180 {
 181         VFS_FIND(lock);
 182         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
 183 }
 184
 185 /****************************************************************************
 186  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 187  broken NFS implementations.
 188 ****************************************************************************/
 189
 190 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
 191 {
 192         bool ret;
 193
 194         DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
 195                  fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
 196
 197         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 198
 199         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 200
 201                 DEBUG(0, ("posix_fcntl_lock: WARNING: lock request at offset "
 202                           "%ju, length %ju returned\n",
 203                           (uintmax_t)offset, (uintmax_t)count));
 204                 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
 205                              "lock offsets\n", strerror(errno)));
 206                 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
 207
 208                 /*
 209                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 210                  * 32 bit NFS mounted filesystems. Just ignore it.
 211                  */
 212
 213                 if (offset & ~((off_t)0x7fffffff)) {
 214                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 215                         return True;
 216                 }
 217
 218                 if (count & ~((off_t)0x7fffffff)) {
 219                         /* 32 bit NFS file system, retry with smaller offset */
 220                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 221                         errno = 0;
 222                         count &= 0x7fffffff;
 223                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 224                 }
 225         }
 226
 227         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 228         return ret;
 229 }
 230
 231 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
 232                           struct files_struct *fsp, off_t *poffset,
 233                           off_t *pcount, int *ptype, pid_t *ppid)
 234 {
 235         VFS_FIND(getlock);
 236         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
 237                                        ppid);
 238 }
 239
 240 /****************************************************************************
 241  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
 242  broken NFS implementations.
 243 ****************************************************************************/
 244
 245 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
 246 {
 247         pid_t pid;
 248         bool ret;
 249
 250         DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
 251                   fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
 252                   *ptype));
 253
 254         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
 255
 256         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 257
 258                 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
 259                           "offset %ju, length %ju returned\n",
 260                           (uintmax_t)*poffset, (uintmax_t)*pcount));
 261                 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
 262                              "lock offsets\n", strerror(errno)));
 263                 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
 264
 265                 /*
 266                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 267                  * 32 bit NFS mounted filesystems. Just ignore it.
 268                  */
 269
 270                 if (*poffset & ~((off_t)0x7fffffff)) {
 271                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 272                         return True;
 273                 }
 274
 275                 if (*pcount & ~((off_t)0x7fffffff)) {
 276                         /* 32 bit NFS file system, retry with smaller offset */
 277                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 278                         errno = 0;
 279                         *pcount &= 0x7fffffff;
 280                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
 281                 }
 282         }
 283
 284         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
 285         return ret;
 286 }
 287
 288 /****************************************************************************
 289  POSIX function to see if a file region is locked. Returns True if the
 290  region is locked, False otherwise.
 291 ****************************************************************************/
 292
 293 bool is_posix_locked(files_struct *fsp,
 294                         uint64_t *pu_offset,
 295                         uint64_t *pu_count,
 296                         enum brl_type *plock_type,
 297                         enum brl_flavour lock_flav)
 298 {
 299         off_t offset;
 300         off_t count;
 301         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
 302
 303         DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
 304                    "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
 305                    (uintmax_t)*pu_count,  posix_lock_type_name(*plock_type)));
 306
 307         /*
 308          * If the requested lock won't fit in the POSIX range, we will
 309          * never set it, so presume it is not locked.
 310          */
 311
 312         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
 313                 return False;
 314         }
 315
 316         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
 317                 return False;
 318         }
 319
 320         if (posix_lock_type == F_UNLCK) {
 321                 return False;
 322         }
 323
 324         if (lock_flav == POSIX_LOCK) {
 325                 /* Only POSIX lock queries need to know the details. */
 326                 *pu_offset = (uint64_t)offset;
 327                 *pu_count = (uint64_t)count;
 328                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
 329         }
 330         return True;
 331 }
 332
 333 /****************************************************************************
 334  Next - the functions that deal with in memory database storing representations
 335  of either Windows CIFS locks or POSIX CIFS locks.
 336 ****************************************************************************/
 337
 338 /* The key used in the in-memory POSIX databases. */
 339
 340 struct lock_ref_count_key {
 341         struct file_id id;
 342         char r;
 343 };
 344
 345 /*******************************************************************
 346  Form a static locking key for a dev/inode pair for the lock ref count
 347 ******************************************************************/
 348
 349 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
 350                                           struct lock_ref_count_key *tmp)
 351 {
 352         ZERO_STRUCTP(tmp);
 353         tmp->id = fsp->file_id;
 354         tmp->r = 'r';
 355         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
 356 }
 357
 358 /*******************************************************************
 359  Convenience function to get an fd_array key from an fsp.
 360 ******************************************************************/
 361
 362 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
 363 {
 364         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
 365 }
 366
 367 /*******************************************************************
 368  Create the in-memory POSIX lock databases.
 369 ********************************************************************/
 370
 371 bool posix_locking_init(bool read_only)
 372 {
 373         if (posix_pending_close_db != NULL) {
 374                 return true;
 375         }
 376
 377         posix_pending_close_db = db_open_rbt(NULL);
 378
 379         if (posix_pending_close_db == NULL) {
 380                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
 381                 return false;
 382         }
 383
 384         return true;
 385 }
 386
 387 /*******************************************************************
 388  Delete the in-memory POSIX lock databases.
 389 ********************************************************************/
 390
 391 bool posix_locking_end(void)
 392 {
 393         /*
 394          * Shouldn't we close all fd's here?
 395          */
 396         TALLOC_FREE(posix_pending_close_db);
 397         return true;
 398 }
 399
 400 /****************************************************************************
 401  Next - the functions that deal with storing fd's that have outstanding
 402  POSIX locks when closed.
 403 ****************************************************************************/
 404
 405 /****************************************************************************
 406  The records in posix_pending_close_db are composed of an array of
 407  ints keyed by dev/ino pair. Those ints are the fd's that were open on
 408  this dev/ino pair that should have been closed, but can't as the lock
 409  ref count is non zero.
 410 ****************************************************************************/
 411
 412 /****************************************************************************
 413  Keep a reference count of the number of Windows locks open on this dev/ino
 414  pair. Creates entry if it doesn't exist.
 415 ****************************************************************************/
 416
 417 static void increment_windows_lock_ref_count(files_struct *fsp)
 418 {
 419         struct lock_ref_count_key tmp;
 420         int32_t lock_ref_count = 0;
 421         NTSTATUS status;
 422
 423         status = dbwrap_change_int32_atomic(
 424                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 425                 &lock_ref_count, 1);
 426
 427         SMB_ASSERT(NT_STATUS_IS_OK(status));
 428         SMB_ASSERT(lock_ref_count < INT32_MAX);
 429
 430         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
 431                   fsp_str_dbg(fsp), (int)lock_ref_count));
 432 }
 433
 434 /****************************************************************************
 435  Bulk delete - subtract as many locks as we've just deleted.
 436 ****************************************************************************/
 437
 438 static void decrement_windows_lock_ref_count(files_struct *fsp)
 439 {
 440         struct lock_ref_count_key tmp;
 441         int32_t lock_ref_count = 0;
 442         NTSTATUS status;
 443
 444         status = dbwrap_change_int32_atomic(
 445                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 446                 &lock_ref_count, -1);
 447
 448         SMB_ASSERT(NT_STATUS_IS_OK(status));
 449         SMB_ASSERT(lock_ref_count >= 0);
 450
 451         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
 452                   fsp_str_dbg(fsp), (int)lock_ref_count));
 453 }
 454
 455 /****************************************************************************
 456  Fetch the lock ref count.
 457 ****************************************************************************/
 458
 459 static int32_t get_windows_lock_ref_count(files_struct *fsp)
 460 {
 461         struct lock_ref_count_key tmp;
 462         NTSTATUS status;
 463         int32_t lock_ref_count = 0;
 464
 465         status = dbwrap_fetch_int32(
 466                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 467                 &lock_ref_count);
 468
 469         if (!NT_STATUS_IS_OK(status) &&
 470             !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 471                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
 472                           "lock ref count for file %s: %s\n",
 473                           fsp_str_dbg(fsp), nt_errstr(status)));
 474         }
 475         return lock_ref_count;
 476 }
 477
 478 /****************************************************************************
 479  Delete a lock_ref_count entry.
 480 ****************************************************************************/
 481
 482 static void delete_windows_lock_ref_count(files_struct *fsp)
 483 {
 484         struct lock_ref_count_key tmp;
 485
 486         /* Not a bug if it doesn't exist - no locks were ever granted. */
 487
 488         dbwrap_delete(posix_pending_close_db,
 489                       locking_ref_count_key_fsp(fsp, &tmp));
 490
 491         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
 492                   fsp_str_dbg(fsp)));
 493 }
 494
 495 /****************************************************************************
 496  Add an fd to the pending close tdb.
 497 ****************************************************************************/
 498
 499 static void add_fd_to_close_entry(files_struct *fsp)
 500 {
 501         struct db_record *rec;
 502         int *fds;
 503         size_t num_fds;
 504         NTSTATUS status;
 505         TDB_DATA value;
 506
 507         rec = dbwrap_fetch_locked(
 508                 posix_pending_close_db, talloc_tos(),
 509                 fd_array_key_fsp(fsp));
 510
 511         SMB_ASSERT(rec != NULL);
 512
 513         value = dbwrap_record_get_value(rec);
 514         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
 515
 516         num_fds = value.dsize / sizeof(int);
 517         fds = talloc_array(rec, int, num_fds+1);
 518
 519         SMB_ASSERT(fds != NULL);
 520
 521         memcpy(fds, value.dptr, value.dsize);
 522         fds[num_fds] = fsp->fh->fd;
 523
 524         status = dbwrap_record_store(
 525                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
 526
 527         SMB_ASSERT(NT_STATUS_IS_OK(status));
 528
 529         TALLOC_FREE(rec);
 530
 531         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
 532                   fsp->fh->fd, fsp_str_dbg(fsp)));
 533 }
 534
 535 /****************************************************************************
 536  Remove all fd entries for a specific dev/inode pair from the tdb.
 537 ****************************************************************************/
 538
 539 static void delete_close_entries(files_struct *fsp)
 540 {
 541         struct db_record *rec;
 542
 543         rec = dbwrap_fetch_locked(
 544                 posix_pending_close_db, talloc_tos(),
 545                 fd_array_key_fsp(fsp));
 546
 547         SMB_ASSERT(rec != NULL);
 548         dbwrap_record_delete(rec);
 549         TALLOC_FREE(rec);
 550 }
 551
 552 /****************************************************************************
 553  Get the array of POSIX pending close records for an open fsp. Returns number
 554  of entries.
 555 ****************************************************************************/
 556
 557 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
 558                                               files_struct *fsp, int **entries)
 559 {
 560         TDB_DATA dbuf;
 561         NTSTATUS status;
 562
 563         status = dbwrap_fetch(
 564                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
 565                 &dbuf);
 566
 567         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 568                 *entries = NULL;
 569                 return 0;
 570         }
 571
 572         SMB_ASSERT(NT_STATUS_IS_OK(status));
 573
 574         if (dbuf.dsize == 0) {
 575                 *entries = NULL;
 576                 return 0;
 577         }
 578
 579         *entries = (int *)dbuf.dptr;
 580         return (size_t)(dbuf.dsize / sizeof(int));
 581 }
 582
 583 /****************************************************************************
 584  Deal with pending closes needed by POSIX locking support.
 585  Note that posix_locking_close_file() is expected to have been called
 586  to delete all locks on this fsp before this function is called.
 587 ****************************************************************************/
 588
 589 int fd_close_posix(struct files_struct *fsp)
 590 {
 591         int saved_errno = 0;
 592         int ret;
 593         int *fd_array = NULL;
 594         size_t count, i;
 595
 596         if (!lp_locking(fsp->conn->params) ||
 597             !lp_posix_locking(fsp->conn->params))
 598         {
 599                 /*
 600                  * No locking or POSIX to worry about or we want POSIX semantics
 601                  * which will lose all locks on all fd's open on this dev/inode,
 602                  * just close.
 603                  */
 604                 return close(fsp->fh->fd);
 605         }
 606
 607         if (get_windows_lock_ref_count(fsp)) {
 608
 609                 /*
 610                  * There are outstanding locks on this dev/inode pair on
 611                  * other fds. Add our fd to the pending close tdb and set
 612                  * fsp->fh->fd to -1.
 613                  */
 614
 615                 add_fd_to_close_entry(fsp);
 616                 return 0;
 617         }
 618
 619         /*
 620          * No outstanding locks. Get the pending close fd's
 621          * from the tdb and close them all.
 622          */
 623
 624         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
 625
 626         if (count) {
 627                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
 628                           (unsigned int)count));
 629
 630                 for(i = 0; i < count; i++) {
 631                         if (close(fd_array[i]) == -1) {
 632                                 saved_errno = errno;
 633                         }
 634                 }
 635
 636                 /*
 637                  * Delete all fd's stored in the tdb
 638                  * for this dev/inode pair.
 639                  */
 640
 641                 delete_close_entries(fsp);
 642         }
 643
 644         TALLOC_FREE(fd_array);
 645
 646         /* Don't need a lock ref count on this dev/ino anymore. */
 647         delete_windows_lock_ref_count(fsp);
 648
 649         /*
 650          * Finally close the fd associated with this fsp.
 651          */
 652
 653         ret = close(fsp->fh->fd);
 654
 655         if (ret == 0 && saved_errno != 0) {
 656                 errno = saved_errno;
 657                 ret = -1;
 658         }
 659
 660         return ret;
 661 }
 662
 663 /****************************************************************************
 664  Next - the functions that deal with the mapping CIFS Windows locks onto
 665  the underlying system POSIX locks.
 666 ****************************************************************************/
 667
 668 /*
 669  * Structure used when splitting a lock range
 670  * into a POSIX lock range. Doubly linked list.
 671  */
 672
 673 struct lock_list {
 674         struct lock_list *next;
 675         struct lock_list *prev;
 676         off_t start;
 677         off_t size;
 678 };
 679
 680 /****************************************************************************
 681  Create a list of lock ranges that don't overlap a given range. Used in calculating
 682  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 683  understand it :-).
 684 ****************************************************************************/
 685
 686 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
 687                                                 struct lock_list *lhead,
 688                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
 689                                                 const struct lock_struct *plocks,
 690                                                 int num_locks)
 691 {
 692         int i;
 693
 694         /*
 695          * Check the current lock list on this dev/inode pair.
 696          * Quit if the list is deleted.
 697          */
 698
 699         DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
 700                    (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
 701
 702         for (i=0; i<num_locks && lhead; i++) {
 703                 const struct lock_struct *lock = &plocks[i];
 704                 struct lock_list *l_curr;
 705
 706                 /* Ignore all but read/write locks. */
 707                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
 708                         continue;
 709                 }
 710
 711                 /* Ignore locks not owned by this process. */
 712                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
 713                         continue;
 714                 }
 715
 716                 /*
 717                  * Walk the lock list, checking for overlaps. Note that
 718                  * the lock list can expand within this loop if the current
 719                  * range being examined needs to be split.
 720                  */
 721
 722                 for (l_curr = lhead; l_curr;) {
 723
 724                         DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
 725                                    "start=%ju,size=%ju:type=%s",
 726                                    (uintmax_t)lock->fnum,
 727                                    (uintmax_t)lock->start,
 728                                    (uintmax_t)lock->size,
 729                                    posix_lock_type_name(lock->lock_type) ));
 730
 731                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 732                                  (lock->start >= (l_curr->start + l_curr->size))) {
 733
 734                                 /* No overlap with existing lock - leave this range alone. */
 735 /*********************************************
 736                                              +---------+
 737                                              | l_curr  |
 738                                              +---------+
 739                                 +-------+
 740                                 | lock  |
 741                                 +-------+
 742 OR....
 743              +---------+
 744              |  l_curr |
 745              +---------+
 746 **********************************************/
 747
 748                                 DEBUG(10,(" no overlap case.\n" ));
 749
 750                                 l_curr = l_curr->next;
 751
 752                         } else if ( (l_curr->start >= lock->start) &&
 753                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 754
 755                                 /*
 756                                  * This range is completely overlapped by this existing lock range
 757                                  * and thus should have no effect. Delete it from the list.
 758                                  */
 759 /*********************************************
 760                 +---------+
 761                 |  l_curr |
 762                 +---------+
 763         +---------------------------+
 764         |       lock                |
 765         +---------------------------+
 766 **********************************************/
 767                                 /* Save the next pointer */
 768                                 struct lock_list *ul_next = l_curr->next;
 769
 770                                 DEBUG(10,(" delete case.\n" ));
 771
 772                                 DLIST_REMOVE(lhead, l_curr);
 773                                 if(lhead == NULL) {
 774                                         break; /* No more list... */
 775                                 }
 776
 777                                 l_curr = ul_next;
 778
 779                         } else if ( (l_curr->start >= lock->start) &&
 780                                                 (l_curr->start < lock->start + lock->size) &&
 781                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 782
 783                                 /*
 784                                  * This range overlaps the existing lock range at the high end.
 785                                  * Truncate by moving start to existing range end and reducing size.
 786                                  */
 787 /*********************************************
 788                 +---------------+
 789                 |  l_curr       |
 790                 +---------------+
 791         +---------------+
 792         |    lock       |
 793         +---------------+
 794 BECOMES....
 795                         +-------+
 796                         | l_curr|
 797                         +-------+
 798 **********************************************/
 799
 800                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 801                                 l_curr->start = lock->start + lock->size;
 802
 803                                 DEBUG(10, (" truncate high case: start=%ju,"
 804                                            "size=%ju\n",
 805                                            (uintmax_t)l_curr->start,
 806                                            (uintmax_t)l_curr->size ));
 807
 808                                 l_curr = l_curr->next;
 809
 810                         } else if ( (l_curr->start < lock->start) &&
 811                                                 (l_curr->start + l_curr->size > lock->start) &&
 812                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 813
 814                                 /*
 815                                  * This range overlaps the existing lock range at the low end.
 816                                  * Truncate by reducing size.
 817                                  */
 818 /*********************************************
 819    +---------------+
 820    |  l_curr       |
 821    +---------------+
 822            +---------------+
 823            |    lock       |
 824            +---------------+
 825 BECOMES....
 826    +-------+
 827    | l_curr|
 828    +-------+
 829 **********************************************/
 830
 831                                 l_curr->size = lock->start - l_curr->start;
 832
 833                                 DEBUG(10, (" truncate low case: start=%ju,"
 834                                            "size=%ju\n",
 835                                            (uintmax_t)l_curr->start,
 836                                            (uintmax_t)l_curr->size ));
 837
 838                                 l_curr = l_curr->next;
 839
 840                         } else if ( (l_curr->start < lock->start) &&
 841                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 842                                 /*
 843                                  * Worst case scenario. Range completely overlaps an existing
 844                                  * lock range. Split the request into two, push the new (upper) request
 845                                  * into the dlink list, and continue with the entry after l_new (as we
 846                                  * know that l_new will not overlap with this lock).
 847                                  */
 848 /*********************************************
 849         +---------------------------+
 850         |        l_curr             |
 851         +---------------------------+
 852                 +---------+
 853                 | lock    |
 854                 +---------+
 855 BECOMES.....
 856         +-------+         +---------+
 857         | l_curr|         | l_new   |
 858         +-------+         +---------+
 859 **********************************************/
 860                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
 861
 862                                 if(l_new == NULL) {
 863                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 864                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 865                                 }
 866
 867                                 ZERO_STRUCTP(l_new);
 868                                 l_new->start = lock->start + lock->size;
 869                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 870
 871                                 /* Truncate the l_curr. */
 872                                 l_curr->size = lock->start - l_curr->start;
 873
 874                                 DEBUG(10, (" split case: curr: start=%ju,"
 875                                            "size=%ju new: start=%ju,"
 876                                            "size=%ju\n",
 877                                            (uintmax_t)l_curr->start,
 878                                            (uintmax_t)l_curr->size,
 879                                            (uintmax_t)l_new->start,
 880                                            (uintmax_t)l_new->size ));
 881
 882                                 /*
 883                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 884                                  */
 885                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
 886
 887                                 /* And move after the link we added. */
 888                                 l_curr = l_new->next;
 889
 890                         } else {
 891
 892                                 /*
 893                                  * This logic case should never happen. Ensure this is the
 894                                  * case by forcing an abort.... Remove in production.
 895                                  */
 896                                 char *msg = NULL;
 897
 898                                 if (asprintf(&msg, "logic flaw in cases: "
 899                                              "l_curr: start = %ju, "
 900                                              "size = %ju : lock: "
 901                                              "start = %ju, size = %ju",
 902                                              (uintmax_t)l_curr->start,
 903                                              (uintmax_t)l_curr->size,
 904                                              (uintmax_t)lock->start,
 905                                              (uintmax_t)lock->size ) != -1) {
 906                                         smb_panic(msg);
 907                                 } else {
 908                                         smb_panic("posix_lock_list");
 909                                 }
 910                         }
 911                 } /* end for ( l_curr = lhead; l_curr;) */
 912         } /* end for (i=0; i<num_locks && ul_head; i++) */
 913
 914         return lhead;
 915 }
 916
 917 /****************************************************************************
 918  POSIX function to acquire a lock. Returns True if the
 919  lock could be granted, False if not.
 920 ****************************************************************************/
 921
 922 bool set_posix_lock_windows_flavour(files_struct *fsp,
 923                         uint64_t u_offset,
 924                         uint64_t u_count,
 925                         enum brl_type lock_type,
 926                         const struct lock_context *lock_ctx,
 927                         const struct lock_struct *plocks,
 928                         int num_locks,
 929                         int *errno_ret)
 930 {
 931         off_t offset;
 932         off_t count;
 933         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 934         bool ret = True;
 935         size_t lock_count;
 936         TALLOC_CTX *l_ctx = NULL;
 937         struct lock_list *llist = NULL;
 938         struct lock_list *ll = NULL;
 939
 940         DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
 941                   "count = %ju, type = %s\n", fsp_str_dbg(fsp),
 942                   (uintmax_t)u_offset, (uintmax_t)u_count,
 943                   posix_lock_type_name(lock_type)));
 944
 945         /*
 946          * If the requested lock won't fit in the POSIX range, we will
 947          * pretend it was successful.
 948          */
 949
 950         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
 951                 increment_windows_lock_ref_count(fsp);
 952                 return True;
 953         }
 954
 955         /*
 956          * Windows is very strange. It allows read locks to be overlayed
 957          * (even over a write lock), but leaves the write lock in force until the first
 958          * unlock. It also reference counts the locks. This means the following sequence :
 959          *
 960          * process1                                      process2
 961          * ------------------------------------------------------------------------
 962          * WRITE LOCK : start = 2, len = 10
 963          *                                            READ LOCK: start =0, len = 10 - FAIL
 964          * READ LOCK : start = 0, len = 14
 965          *                                            READ LOCK: start =0, len = 10 - FAIL
 966          * UNLOCK : start = 2, len = 10
 967          *                                            READ LOCK: start =0, len = 10 - OK
 968          *
 969          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 970          * would leave a single read lock over the 0-14 region.
 971          */
 972
 973         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
 974                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
 975                 return False;
 976         }
 977
 978         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
 979                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
 980                 talloc_destroy(l_ctx);
 981                 return False;
 982         }
 983
 984         /*
 985          * Create the initial list entry containing the
 986          * lock we want to add.
 987          */
 988
 989         ZERO_STRUCTP(ll);
 990         ll->start = offset;
 991         ll->size = count;
 992
 993         DLIST_ADD(llist, ll);
 994
 995         /*
 996          * The following call calculates if there are any
 997          * overlapping locks held by this process on
 998          * fd's open on the same file and splits this list
 999          * into a list of lock ranges that do not overlap with existing
1000          * POSIX locks.
1001          */
1002
1003         llist = posix_lock_list(l_ctx,
1004                                 llist,
1005                                 lock_ctx, /* Lock context llist belongs to. */
1006                                 plocks,
1007                                 num_locks);
1008
1009         /*
1010          * Add the POSIX locks on the list of ranges returned.
1011          * As the lock is supposed to be added atomically, we need to
1012          * back out all the locks if any one of these calls fail.
1013          */
1014
1015         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1016                 offset = ll->start;
1017                 count = ll->size;
1018
1019                 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
1020                           "Type = %s: offset = %ju, count = %ju\n",
1021                           posix_lock_type_name(posix_lock_type),
1022                           (uintmax_t)offset, (uintmax_t)count ));
1023
1024                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1025                         *errno_ret = errno;
1026                         DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
1027                                   "fail !: Type = %s: offset = %ju, "
1028                                   "count = %ju. Errno = %s\n",
1029                                   posix_lock_type_name(posix_lock_type),
1030                                   (uintmax_t)offset, (uintmax_t)count,
1031                                   strerror(errno) ));
1032                         ret = False;
1033                         break;
1034                 }
1035         }
1036
1037         if (!ret) {
1038
1039                 /*
1040                  * Back out all the POSIX locks we have on fail.
1041                  */
1042
1043                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1044                         offset = ll->start;
1045                         count = ll->size;
1046
1047                         DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1048                                   "out locks: Type = %s: offset = %ju, "
1049                                   "count = %ju\n",
1050                                   posix_lock_type_name(posix_lock_type),
1051                                   (uintmax_t)offset, (uintmax_t)count ));
1052
1053                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1054                 }
1055         } else {
1056                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1057                 increment_windows_lock_ref_count(fsp);
1058         }
1059
1060         talloc_destroy(l_ctx);
1061         return ret;
1062 }
1063
1064 /****************************************************************************
1065  POSIX function to release a lock. Returns True if the
1066  lock could be released, False if not.
1067 ****************************************************************************/
1068
1069 bool release_posix_lock_windows_flavour(files_struct *fsp,
1070                                 uint64_t u_offset,
1071                                 uint64_t u_count,
1072                                 enum brl_type deleted_lock_type,
1073                                 const struct lock_context *lock_ctx,
1074                                 const struct lock_struct *plocks,
1075                                 int num_locks)
1076 {
1077         off_t offset;
1078         off_t count;
1079         bool ret = True;
1080         TALLOC_CTX *ul_ctx = NULL;
1081         struct lock_list *ulist = NULL;
1082         struct lock_list *ul = NULL;
1083
1084         DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1085                   "count = %ju\n", fsp_str_dbg(fsp),
1086                   (uintmax_t)u_offset, (uintmax_t)u_count));
1087
1088         /* Remember the number of Windows locks we have on this dev/ino pair. */
1089         decrement_windows_lock_ref_count(fsp);
1090
1091         /*
1092          * If the requested lock won't fit in the POSIX range, we will
1093          * pretend it was successful.
1094          */
1095
1096         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1097                 return True;
1098         }
1099
1100         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1101                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1102                 return False;
1103         }
1104
1105         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1106                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1107                 talloc_destroy(ul_ctx);
1108                 return False;
1109         }
1110
1111         /*
1112          * Create the initial list entry containing the
1113          * lock we want to remove.
1114          */
1115
1116         ZERO_STRUCTP(ul);
1117         ul->start = offset;
1118         ul->size = count;
1119
1120         DLIST_ADD(ulist, ul);
1121
1122         /*
1123          * The following call calculates if there are any
1124          * overlapping locks held by this process on
1125          * fd's open on the same file and creates a
1126          * list of unlock ranges that will allow
1127          * POSIX lock ranges to remain on the file whilst the
1128          * unlocks are performed.
1129          */
1130
1131         ulist = posix_lock_list(ul_ctx,
1132                                 ulist,
1133                                 lock_ctx, /* Lock context ulist belongs to. */
1134                                 plocks,
1135                                 num_locks);
1136
1137         /*
1138          * If there were any overlapped entries (list is > 1 or size or start have changed),
1139          * and the lock_type we just deleted from
1140          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1141          * the POSIX lock to a read lock. This allows any overlapping read locks
1142          * to be atomically maintained.
1143          */
1144
1145         if (deleted_lock_type == WRITE_LOCK &&
1146                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1147
1148                 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1149                           "lock to READ: offset = %ju, count = %ju\n",
1150                           (uintmax_t)offset, (uintmax_t)count ));
1151
1152                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1153                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1154                         talloc_destroy(ul_ctx);
1155                         return False;
1156                 }
1157         }
1158
1159         /*
1160          * Release the POSIX locks on the list of ranges returned.
1161          */
1162
1163         for(; ulist; ulist = ulist->next) {
1164                 offset = ulist->start;
1165                 count = ulist->size;
1166
1167                 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1168                           "offset = %ju, count = %ju\n",
1169                           (uintmax_t)offset, (uintmax_t)count ));
1170
1171                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1172                         ret = False;
1173                 }
1174         }
1175
1176         talloc_destroy(ul_ctx);
1177         return ret;
1178 }
1179
1180 /****************************************************************************
1181  Next - the functions that deal with mapping CIFS POSIX locks onto
1182  the underlying system POSIX locks.
1183 ****************************************************************************/
1184
1185 /****************************************************************************
1186  POSIX function to acquire a lock. Returns True if the
1187  lock could be granted, False if not.
1188  As POSIX locks don't stack or conflict (they just overwrite)
1189  we can map the requested lock directly onto a system one. We
1190  know it doesn't conflict with locks on other contexts as the
1191  upper layer would have refused it.
1192 ****************************************************************************/
1193
1194 bool set_posix_lock_posix_flavour(files_struct *fsp,
1195                         uint64_t u_offset,
1196                         uint64_t u_count,
1197                         enum brl_type lock_type,
1198                         int *errno_ret)
1199 {
1200         off_t offset;
1201         off_t count;
1202         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1203
1204         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1205                  "= %ju, type = %s\n", fsp_str_dbg(fsp),
1206                  (uintmax_t)u_offset, (uintmax_t)u_count,
1207                  posix_lock_type_name(lock_type)));
1208
1209         /*
1210          * If the requested lock won't fit in the POSIX range, we will
1211          * pretend it was successful.
1212          */
1213
1214         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1215                 return True;
1216         }
1217
1218         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1219                 *errno_ret = errno;
1220                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1221                         posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1222                 return False;
1223         }
1224         return True;
1225 }
1226
1227 /****************************************************************************
1228  POSIX function to release a lock. Returns True if the
1229  lock could be released, False if not.
1230  We are given a complete lock state from the upper layer which is what the lock
1231  state should be after the unlock has already been done, so what
1232  we do is punch out holes in the unlock range where locks owned by this process
1233  have a different lock context.
1234 ****************************************************************************/
1235
1236 bool release_posix_lock_posix_flavour(files_struct *fsp,
1237                                 uint64_t u_offset,
1238                                 uint64_t u_count,
1239                                 const struct lock_context *lock_ctx,
1240                                 const struct lock_struct *plocks,
1241                                 int num_locks)
1242 {
1243         bool ret = True;
1244         off_t offset;
1245         off_t count;
1246         TALLOC_CTX *ul_ctx = NULL;
1247         struct lock_list *ulist = NULL;
1248         struct lock_list *ul = NULL;
1249
1250         DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1251                   "count = %ju\n", fsp_str_dbg(fsp),
1252                   (uintmax_t)u_offset, (uintmax_t)u_count));
1253
1254         /*
1255          * If the requested lock won't fit in the POSIX range, we will
1256          * pretend it was successful.
1257          */
1258
1259         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1260                 return True;
1261         }
1262
1263         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1264                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1265                 return False;
1266         }
1267
1268         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1269                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1270                 talloc_destroy(ul_ctx);
1271                 return False;
1272         }
1273
1274         /*
1275          * Create the initial list entry containing the
1276          * lock we want to remove.
1277          */
1278
1279         ZERO_STRUCTP(ul);
1280         ul->start = offset;
1281         ul->size = count;
1282
1283         DLIST_ADD(ulist, ul);
1284
1285         /*
1286          * Walk the given array creating a linked list
1287          * of unlock requests.
1288          */
1289
1290         ulist = posix_lock_list(ul_ctx,
1291                                 ulist,
1292                                 lock_ctx, /* Lock context ulist belongs to. */
1293                                 plocks,
1294                                 num_locks);
1295
1296         /*
1297          * Release the POSIX locks on the list of ranges returned.
1298          */
1299
1300         for(; ulist; ulist = ulist->next) {
1301                 offset = ulist->start;
1302                 count = ulist->size;
1303
1304                 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1305                           "offset = %ju, count = %ju\n",
1306                           (uintmax_t)offset, (uintmax_t)count ));
1307
1308                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1309                         ret = False;
1310                 }
1311         }
1312
1313         talloc_destroy(ul_ctx);
1314         return ret;
1315 }