source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2006
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Revision History:
  20
  21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/filesys.h"
  26 #include "locking/proto.h"
  27 #include "dbwrap/dbwrap.h"
  28 #include "dbwrap/dbwrap_rbt.h"
  29 #include "util_tdb.h"
  30
  31 #undef DBGC_CLASS
  32 #define DBGC_CLASS DBGC_LOCKING
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static struct db_context *posix_pending_close_db;
  39
  40 /****************************************************************************
  41  First - the functions that deal with the underlying system locks - these
  42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
  43  POSIX locks onto POSIX.
  44 ****************************************************************************/
  45
  46 /****************************************************************************
  47  Utility function to map a lock type correctly depending on the open
  48  mode of a file.
  49 ****************************************************************************/
  50
  51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  52 {
  53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
  54                 /*
  55                  * Many UNIX's cannot get a write lock on a file opened read-only.
  56                  * Win32 locking semantics allow this.
  57                  * Do the best we can and attempt a read-only lock.
  58                  */
  59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
  60                 return F_RDLCK;
  61         }
  62
  63         /*
  64          * This return should be the most normal, as we attempt
  65          * to always open files read/write.
  66          */
  67
  68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  69 }
  70
  71 /****************************************************************************
  72  Debugging aid :-).
  73 ****************************************************************************/
  74
  75 static const char *posix_lock_type_name(int lock_type)
  76 {
  77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  78 }
  79
  80 /****************************************************************************
  81  Check to see if the given unsigned lock range is within the possible POSIX
  82  range. Modifies the given args to be in range if possible, just returns
  83  False if not.
  84 ****************************************************************************/
  85
  86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
  87                                 uint64_t u_offset, uint64_t u_count)
  88 {
  89         off_t offset = (off_t)u_offset;
  90         off_t count = (off_t)u_count;
  91
  92         /*
  93          * For the type of system we are, attempt to
  94          * find the maximum positive lock offset as an off_t.
  95          */
  96
  97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  98
  99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 100 #else
 101         /*
 102          * In this case off_t is 64 bits,
 103          * and the underlying system can handle 64 bit signed locks.
 104          */
 105
 106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
 107         off_t mask = (mask2<<1);
 108         off_t max_positive_lock_offset = ~mask;
 109
 110 #endif
 111         /*
 112          * POSIX locks of length zero mean lock to end-of-file.
 113          * Win32 locks of length zero are point probes. Ignore
 114          * any Win32 locks of length zero. JRA.
 115          */
 116
 117         if (count == (off_t)0) {
 118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 119                 return False;
 120         }
 121
 122         /*
 123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 124          * ignore this lock.
 125          */
 126
 127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
 128                 DEBUG(10, ("posix_lock_in_range: (offset = %ju) offset > %ju "
 129                            "and we cannot handle this. Ignoring lock.\n",
 130                            (uintmax_t)u_offset,
 131                            (uintmax_t)max_positive_lock_offset));
 132                 return False;
 133         }
 134
 135         /*
 136          * We must truncate the count to less than max_positive_lock_offset.
 137          */
 138
 139         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
 140                 count = max_positive_lock_offset;
 141         }
 142
 143         /*
 144          * Truncate count to end at max lock offset.
 145          */
 146
 147         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
 148                 count = max_positive_lock_offset - offset;
 149         }
 150
 151         /*
 152          * If we ate all the count, ignore this lock.
 153          */
 154
 155         if (count == 0) {
 156                 DEBUG(10, ("posix_lock_in_range: Count = 0. Ignoring lock "
 157                            "u_offset = %ju, u_count = %ju\n",
 158                            (uintmax_t)u_offset,
 159                            (uintmax_t)u_count));
 160                 return False;
 161         }
 162
 163         /*
 164          * The mapping was successful.
 165          */
 166
 167         DEBUG(10, ("posix_lock_in_range: offset_out = %ju, "
 168                    "count_out = %ju\n",
 169                    (uintmax_t)offset, (uintmax_t)count));
 170
 171         *offset_out = offset;
 172         *count_out = count;
 173
 174         return True;
 175 }
 176
 177 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
 178                        struct files_struct *fsp, int op, off_t offset,
 179                        off_t count, int type)
 180 {
 181         VFS_FIND(lock);
 182         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
 183 }
 184
 185 /****************************************************************************
 186  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 187  broken NFS implementations.
 188 ****************************************************************************/
 189
 190 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
 191 {
 192         bool ret;
 193
 194         DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
 195                  fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
 196
 197         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 198
 199         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 200
 201                 DEBUG(0, ("posix_fcntl_lock: WARNING: lock request at offset "
 202                           "%ju, length %ju returned\n",
 203                           (uintmax_t)offset, (uintmax_t)count));
 204                 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
 205                              "lock offsets\n", strerror(errno)));
 206                 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
 207
 208                 /*
 209                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 210                  * 32 bit NFS mounted filesystems. Just ignore it.
 211                  */
 212
 213                 if (offset & ~((off_t)0x7fffffff)) {
 214                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 215                         return True;
 216                 }
 217
 218                 if (count & ~((off_t)0x7fffffff)) {
 219                         /* 32 bit NFS file system, retry with smaller offset */
 220                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 221                         errno = 0;
 222                         count &= 0x7fffffff;
 223                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 224                 }
 225         }
 226
 227         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 228         return ret;
 229 }
 230
 231 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
 232                           struct files_struct *fsp, off_t *poffset,
 233                           off_t *pcount, int *ptype, pid_t *ppid)
 234 {
 235         VFS_FIND(getlock);
 236         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
 237                                        ppid);
 238 }
 239
 240 /****************************************************************************
 241  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
 242  broken NFS implementations.
 243 ****************************************************************************/
 244
 245 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
 246 {
 247         pid_t pid;
 248         bool ret;
 249
 250         DEBUG(8, ("posix_fcntl_getlock %d %ju %ju %d\n",
 251                   fsp->fh->fd, (uintmax_t)*poffset, (uintmax_t)*pcount,
 252                   *ptype));
 253
 254         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
 255
 256         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 257
 258                 DEBUG(0, ("posix_fcntl_getlock: WARNING: lock request at "
 259                           "offset %ju, length %ju returned\n",
 260                           (uintmax_t)*poffset, (uintmax_t)*pcount));
 261                 DEBUGADD(0, ("an %s error. This can happen when using 64 bit "
 262                              "lock offsets\n", strerror(errno)));
 263                 DEBUGADD(0, ("on 32 bit NFS mounted file systems.\n"));
 264
 265                 /*
 266                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 267                  * 32 bit NFS mounted filesystems. Just ignore it.
 268                  */
 269
 270                 if (*poffset & ~((off_t)0x7fffffff)) {
 271                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 272                         return True;
 273                 }
 274
 275                 if (*pcount & ~((off_t)0x7fffffff)) {
 276                         /* 32 bit NFS file system, retry with smaller offset */
 277                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 278                         errno = 0;
 279                         *pcount &= 0x7fffffff;
 280                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
 281                 }
 282         }
 283
 284         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
 285         return ret;
 286 }
 287
 288 /****************************************************************************
 289  POSIX function to see if a file region is locked. Returns True if the
 290  region is locked, False otherwise.
 291 ****************************************************************************/
 292
 293 bool is_posix_locked(files_struct *fsp,
 294                         uint64_t *pu_offset,
 295                         uint64_t *pu_count,
 296                         enum brl_type *plock_type,
 297                         enum brl_flavour lock_flav)
 298 {
 299         off_t offset;
 300         off_t count;
 301         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
 302
 303         DEBUG(10, ("is_posix_locked: File %s, offset = %ju, count = %ju, "
 304                    "type = %s\n", fsp_str_dbg(fsp), (uintmax_t)*pu_offset,
 305                    (uintmax_t)*pu_count,  posix_lock_type_name(*plock_type)));
 306
 307         /*
 308          * If the requested lock won't fit in the POSIX range, we will
 309          * never set it, so presume it is not locked.
 310          */
 311
 312         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
 313                 return False;
 314         }
 315
 316         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
 317                 return False;
 318         }
 319
 320         if (posix_lock_type == F_UNLCK) {
 321                 return False;
 322         }
 323
 324         if (lock_flav == POSIX_LOCK) {
 325                 /* Only POSIX lock queries need to know the details. */
 326                 *pu_offset = (uint64_t)offset;
 327                 *pu_count = (uint64_t)count;
 328                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
 329         }
 330         return True;
 331 }
 332
 333 /****************************************************************************
 334  Next - the functions that deal with in memory database storing representations
 335  of either Windows CIFS locks or POSIX CIFS locks.
 336 ****************************************************************************/
 337
 338 /* The key used in the in-memory POSIX databases. */
 339
 340 struct lock_ref_count_key {
 341         struct file_id id;
 342         char r;
 343 };
 344
 345 /*******************************************************************
 346  Form a static locking key for a dev/inode pair for the lock ref count
 347 ******************************************************************/
 348
 349 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
 350                                           struct lock_ref_count_key *tmp)
 351 {
 352         ZERO_STRUCTP(tmp);
 353         tmp->id = fsp->file_id;
 354         tmp->r = 'r';
 355         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
 356 }
 357
 358 /*******************************************************************
 359  Convenience function to get an fd_array key from an fsp.
 360 ******************************************************************/
 361
 362 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
 363 {
 364         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
 365 }
 366
 367 /*******************************************************************
 368  Create the in-memory POSIX lock databases.
 369 ********************************************************************/
 370
 371 bool posix_locking_init(bool read_only)
 372 {
 373         if (posix_pending_close_db != NULL) {
 374                 return true;
 375         }
 376
 377         posix_pending_close_db = db_open_rbt(NULL);
 378
 379         if (posix_pending_close_db == NULL) {
 380                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
 381                 return false;
 382         }
 383
 384         return true;
 385 }
 386
 387 /*******************************************************************
 388  Delete the in-memory POSIX lock databases.
 389 ********************************************************************/
 390
 391 bool posix_locking_end(void)
 392 {
 393         /*
 394          * Shouldn't we close all fd's here?
 395          */
 396         TALLOC_FREE(posix_pending_close_db);
 397         return true;
 398 }
 399
 400 /****************************************************************************
 401  Next - the functions that deal with storing fd's that have outstanding
 402  POSIX locks when closed.
 403 ****************************************************************************/
 404
 405 /****************************************************************************
 406  The records in posix_pending_close_db are composed of an array of
 407  ints keyed by dev/ino pair. Those ints are the fd's that were open on
 408  this dev/ino pair that should have been closed, but can't as the lock
 409  ref count is non zero.
 410 ****************************************************************************/
 411
 412 /****************************************************************************
 413  Keep a reference count of the number of Windows locks open on this dev/ino
 414  pair. Creates entry if it doesn't exist.
 415 ****************************************************************************/
 416
 417 static void increment_windows_lock_ref_count(files_struct *fsp)
 418 {
 419         struct lock_ref_count_key tmp;
 420         int32_t lock_ref_count = 0;
 421         NTSTATUS status;
 422
 423         status = dbwrap_change_int32_atomic(
 424                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 425                 &lock_ref_count, 1);
 426
 427         SMB_ASSERT(NT_STATUS_IS_OK(status));
 428         SMB_ASSERT(lock_ref_count < INT32_MAX);
 429
 430         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
 431                   fsp_str_dbg(fsp), (int)lock_ref_count));
 432 }
 433
 434 /****************************************************************************
 435  Bulk delete - subtract as many locks as we've just deleted.
 436 ****************************************************************************/
 437
 438 static void decrement_windows_lock_ref_count(files_struct *fsp)
 439 {
 440         struct lock_ref_count_key tmp;
 441         int32_t lock_ref_count = 0;
 442         NTSTATUS status;
 443
 444         status = dbwrap_change_int32_atomic(
 445                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 446                 &lock_ref_count, -1);
 447
 448         SMB_ASSERT(NT_STATUS_IS_OK(status));
 449         SMB_ASSERT(lock_ref_count >= 0);
 450
 451         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
 452                   fsp_str_dbg(fsp), (int)lock_ref_count));
 453 }
 454
 455 /****************************************************************************
 456  Fetch the lock ref count.
 457 ****************************************************************************/
 458
 459 static int32_t get_windows_lock_ref_count(files_struct *fsp)
 460 {
 461         struct lock_ref_count_key tmp;
 462         NTSTATUS status;
 463         int32_t lock_ref_count = 0;
 464
 465         status = dbwrap_fetch_int32(
 466                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 467                 &lock_ref_count);
 468
 469         if (!NT_STATUS_IS_OK(status) &&
 470             !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 471                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
 472                           "lock ref count for file %s: %s\n",
 473                           fsp_str_dbg(fsp), nt_errstr(status)));
 474         }
 475         return lock_ref_count;
 476 }
 477
 478 /****************************************************************************
 479  Delete a lock_ref_count entry.
 480 ****************************************************************************/
 481
 482 static void delete_windows_lock_ref_count(files_struct *fsp)
 483 {
 484         struct lock_ref_count_key tmp;
 485
 486         /* Not a bug if it doesn't exist - no locks were ever granted. */
 487
 488         dbwrap_delete(posix_pending_close_db,
 489                       locking_ref_count_key_fsp(fsp, &tmp));
 490
 491         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
 492                   fsp_str_dbg(fsp)));
 493 }
 494
 495 /****************************************************************************
 496  Add an fd to the pending close tdb.
 497 ****************************************************************************/
 498
 499 static void add_fd_to_close_entry(files_struct *fsp)
 500 {
 501         struct db_record *rec;
 502         int *fds;
 503         size_t num_fds;
 504         NTSTATUS status;
 505         TDB_DATA value;
 506
 507         rec = dbwrap_fetch_locked(
 508                 posix_pending_close_db, talloc_tos(),
 509                 fd_array_key_fsp(fsp));
 510
 511         SMB_ASSERT(rec != NULL);
 512
 513         value = dbwrap_record_get_value(rec);
 514         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
 515
 516         num_fds = value.dsize / sizeof(int);
 517         fds = talloc_array(rec, int, num_fds+1);
 518
 519         SMB_ASSERT(fds != NULL);
 520
 521         memcpy(fds, value.dptr, value.dsize);
 522         fds[num_fds] = fsp->fh->fd;
 523
 524         status = dbwrap_record_store(
 525                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
 526
 527         SMB_ASSERT(NT_STATUS_IS_OK(status));
 528
 529         TALLOC_FREE(rec);
 530
 531         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
 532                   fsp->fh->fd, fsp_str_dbg(fsp)));
 533 }
 534
 535 /****************************************************************************
 536  Remove all fd entries for a specific dev/inode pair from the tdb.
 537 ****************************************************************************/
 538
 539 static void delete_close_entries(files_struct *fsp)
 540 {
 541         struct db_record *rec;
 542
 543         rec = dbwrap_fetch_locked(
 544                 posix_pending_close_db, talloc_tos(),
 545                 fd_array_key_fsp(fsp));
 546
 547         SMB_ASSERT(rec != NULL);
 548         dbwrap_record_delete(rec);
 549         TALLOC_FREE(rec);
 550 }
 551
 552 /****************************************************************************
 553  Get the array of POSIX pending close records for an open fsp. Returns number
 554  of entries.
 555 ****************************************************************************/
 556
 557 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
 558                                               files_struct *fsp, int **entries)
 559 {
 560         TDB_DATA dbuf;
 561         NTSTATUS status;
 562
 563         status = dbwrap_fetch(
 564                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
 565                 &dbuf);
 566
 567         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 568                 *entries = NULL;
 569                 return 0;
 570         }
 571
 572         SMB_ASSERT(NT_STATUS_IS_OK(status));
 573
 574         if (dbuf.dsize == 0) {
 575                 *entries = NULL;
 576                 return 0;
 577         }
 578
 579         *entries = (int *)dbuf.dptr;
 580         return (size_t)(dbuf.dsize / sizeof(int));
 581 }
 582
 583 /****************************************************************************
 584  Deal with pending closes needed by POSIX locking support.
 585  Note that posix_locking_close_file() is expected to have been called
 586  to delete all locks on this fsp before this function is called.
 587 ****************************************************************************/
 588
 589 int fd_close_posix(struct files_struct *fsp)
 590 {
 591         int saved_errno = 0;
 592         int ret;
 593         int *fd_array = NULL;
 594         size_t count, i;
 595
 596         if (!lp_locking(fsp->conn->params) ||
 597             !lp_posix_locking(fsp->conn->params))
 598         {
 599                 /*
 600                  * No locking or POSIX to worry about or we want POSIX semantics
 601                  * which will lose all locks on all fd's open on this dev/inode,
 602                  * just close.
 603                  */
 604                 return close(fsp->fh->fd);
 605         }
 606
 607         if (get_windows_lock_ref_count(fsp)) {
 608
 609                 /*
 610                  * There are outstanding locks on this dev/inode pair on
 611                  * other fds. Add our fd to the pending close tdb and set
 612                  * fsp->fh->fd to -1.
 613                  */
 614
 615                 add_fd_to_close_entry(fsp);
 616                 return 0;
 617         }
 618
 619         /*
 620          * No outstanding locks. Get the pending close fd's
 621          * from the tdb and close them all.
 622          */
 623
 624         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
 625
 626         if (count) {
 627                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
 628                           (unsigned int)count));
 629
 630                 for(i = 0; i < count; i++) {
 631                         if (close(fd_array[i]) == -1) {
 632                                 saved_errno = errno;
 633                         }
 634                 }
 635
 636                 /*
 637                  * Delete all fd's stored in the tdb
 638                  * for this dev/inode pair.
 639                  */
 640
 641                 delete_close_entries(fsp);
 642         }
 643
 644         TALLOC_FREE(fd_array);
 645
 646         /* Don't need a lock ref count on this dev/ino anymore. */
 647         delete_windows_lock_ref_count(fsp);
 648
 649         /*
 650          * Finally close the fd associated with this fsp.
 651          */
 652
 653         ret = close(fsp->fh->fd);
 654
 655         if (ret == 0 && saved_errno != 0) {
 656                 errno = saved_errno;
 657                 ret = -1;
 658         }
 659
 660         return ret;
 661 }
 662
 663 /****************************************************************************
 664  Next - the functions that deal with the mapping CIFS Windows locks onto
 665  the underlying system POSIX locks.
 666 ****************************************************************************/
 667
 668 /*
 669  * Structure used when splitting a lock range
 670  * into a POSIX lock range. Doubly linked list.
 671  */
 672
 673 struct lock_list {
 674         struct lock_list *next;
 675         struct lock_list *prev;
 676         off_t start;
 677         off_t size;
 678 };
 679
 680 /****************************************************************************
 681  Create a list of lock ranges that don't overlap a given range. Used in calculating
 682  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 683  understand it :-).
 684 ****************************************************************************/
 685
 686 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
 687                                                 struct lock_list *lhead,
 688                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
 689                                                 files_struct *fsp,
 690                                                 const struct lock_struct *plocks,
 691                                                 int num_locks)
 692 {
 693         int i;
 694
 695         /*
 696          * Check the current lock list on this dev/inode pair.
 697          * Quit if the list is deleted.
 698          */
 699
 700         DEBUG(10, ("posix_lock_list: curr: start=%ju,size=%ju\n",
 701                    (uintmax_t)lhead->start, (uintmax_t)lhead->size ));
 702
 703         for (i=0; i<num_locks && lhead; i++) {
 704                 const struct lock_struct *lock = &plocks[i];
 705                 struct lock_list *l_curr;
 706
 707                 /* Ignore all but read/write locks. */
 708                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
 709                         continue;
 710                 }
 711
 712                 /* Ignore locks not owned by this process. */
 713                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
 714                         continue;
 715                 }
 716
 717                 /*
 718                  * Walk the lock list, checking for overlaps. Note that
 719                  * the lock list can expand within this loop if the current
 720                  * range being examined needs to be split.
 721                  */
 722
 723                 for (l_curr = lhead; l_curr;) {
 724
 725                         DEBUG(10, ("posix_lock_list: lock: fnum=%ju: "
 726                                    "start=%ju,size=%ju:type=%s",
 727                                    (uintmax_t)lock->fnum,
 728                                    (uintmax_t)lock->start,
 729                                    (uintmax_t)lock->size,
 730                                    posix_lock_type_name(lock->lock_type) ));
 731
 732                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 733                                  (lock->start >= (l_curr->start + l_curr->size))) {
 734
 735                                 /* No overlap with existing lock - leave this range alone. */
 736 /*********************************************
 737                                              +---------+
 738                                              | l_curr  |
 739                                              +---------+
 740                                 +-------+
 741                                 | lock  |
 742                                 +-------+
 743 OR....
 744              +---------+
 745              |  l_curr |
 746              +---------+
 747 **********************************************/
 748
 749                                 DEBUG(10,(" no overlap case.\n" ));
 750
 751                                 l_curr = l_curr->next;
 752
 753                         } else if ( (l_curr->start >= lock->start) &&
 754                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 755
 756                                 /*
 757                                  * This range is completely overlapped by this existing lock range
 758                                  * and thus should have no effect. Delete it from the list.
 759                                  */
 760 /*********************************************
 761                 +---------+
 762                 |  l_curr |
 763                 +---------+
 764         +---------------------------+
 765         |       lock                |
 766         +---------------------------+
 767 **********************************************/
 768                                 /* Save the next pointer */
 769                                 struct lock_list *ul_next = l_curr->next;
 770
 771                                 DEBUG(10,(" delete case.\n" ));
 772
 773                                 DLIST_REMOVE(lhead, l_curr);
 774                                 if(lhead == NULL) {
 775                                         break; /* No more list... */
 776                                 }
 777
 778                                 l_curr = ul_next;
 779
 780                         } else if ( (l_curr->start >= lock->start) &&
 781                                                 (l_curr->start < lock->start + lock->size) &&
 782                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 783
 784                                 /*
 785                                  * This range overlaps the existing lock range at the high end.
 786                                  * Truncate by moving start to existing range end and reducing size.
 787                                  */
 788 /*********************************************
 789                 +---------------+
 790                 |  l_curr       |
 791                 +---------------+
 792         +---------------+
 793         |    lock       |
 794         +---------------+
 795 BECOMES....
 796                         +-------+
 797                         | l_curr|
 798                         +-------+
 799 **********************************************/
 800
 801                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 802                                 l_curr->start = lock->start + lock->size;
 803
 804                                 DEBUG(10, (" truncate high case: start=%ju,"
 805                                            "size=%ju\n",
 806                                            (uintmax_t)l_curr->start,
 807                                            (uintmax_t)l_curr->size ));
 808
 809                                 l_curr = l_curr->next;
 810
 811                         } else if ( (l_curr->start < lock->start) &&
 812                                                 (l_curr->start + l_curr->size > lock->start) &&
 813                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 814
 815                                 /*
 816                                  * This range overlaps the existing lock range at the low end.
 817                                  * Truncate by reducing size.
 818                                  */
 819 /*********************************************
 820    +---------------+
 821    |  l_curr       |
 822    +---------------+
 823            +---------------+
 824            |    lock       |
 825            +---------------+
 826 BECOMES....
 827    +-------+
 828    | l_curr|
 829    +-------+
 830 **********************************************/
 831
 832                                 l_curr->size = lock->start - l_curr->start;
 833
 834                                 DEBUG(10, (" truncate low case: start=%ju,"
 835                                            "size=%ju\n",
 836                                            (uintmax_t)l_curr->start,
 837                                            (uintmax_t)l_curr->size ));
 838
 839                                 l_curr = l_curr->next;
 840
 841                         } else if ( (l_curr->start < lock->start) &&
 842                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 843                                 /*
 844                                  * Worst case scenario. Range completely overlaps an existing
 845                                  * lock range. Split the request into two, push the new (upper) request
 846                                  * into the dlink list, and continue with the entry after l_new (as we
 847                                  * know that l_new will not overlap with this lock).
 848                                  */
 849 /*********************************************
 850         +---------------------------+
 851         |        l_curr             |
 852         +---------------------------+
 853                 +---------+
 854                 | lock    |
 855                 +---------+
 856 BECOMES.....
 857         +-------+         +---------+
 858         | l_curr|         | l_new   |
 859         +-------+         +---------+
 860 **********************************************/
 861                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
 862
 863                                 if(l_new == NULL) {
 864                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 865                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 866                                 }
 867
 868                                 ZERO_STRUCTP(l_new);
 869                                 l_new->start = lock->start + lock->size;
 870                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 871
 872                                 /* Truncate the l_curr. */
 873                                 l_curr->size = lock->start - l_curr->start;
 874
 875                                 DEBUG(10, (" split case: curr: start=%ju,"
 876                                            "size=%ju new: start=%ju,"
 877                                            "size=%ju\n",
 878                                            (uintmax_t)l_curr->start,
 879                                            (uintmax_t)l_curr->size,
 880                                            (uintmax_t)l_new->start,
 881                                            (uintmax_t)l_new->size ));
 882
 883                                 /*
 884                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 885                                  */
 886                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
 887
 888                                 /* And move after the link we added. */
 889                                 l_curr = l_new->next;
 890
 891                         } else {
 892
 893                                 /*
 894                                  * This logic case should never happen. Ensure this is the
 895                                  * case by forcing an abort.... Remove in production.
 896                                  */
 897                                 char *msg = NULL;
 898
 899                                 if (asprintf(&msg, "logic flaw in cases: "
 900                                              "l_curr: start = %ju, "
 901                                              "size = %ju : lock: "
 902                                              "start = %ju, size = %ju",
 903                                              (uintmax_t)l_curr->start,
 904                                              (uintmax_t)l_curr->size,
 905                                              (uintmax_t)lock->start,
 906                                              (uintmax_t)lock->size ) != -1) {
 907                                         smb_panic(msg);
 908                                 } else {
 909                                         smb_panic("posix_lock_list");
 910                                 }
 911                         }
 912                 } /* end for ( l_curr = lhead; l_curr;) */
 913         } /* end for (i=0; i<num_locks && ul_head; i++) */
 914
 915         return lhead;
 916 }
 917
 918 /****************************************************************************
 919  POSIX function to acquire a lock. Returns True if the
 920  lock could be granted, False if not.
 921 ****************************************************************************/
 922
 923 bool set_posix_lock_windows_flavour(files_struct *fsp,
 924                         uint64_t u_offset,
 925                         uint64_t u_count,
 926                         enum brl_type lock_type,
 927                         const struct lock_context *lock_ctx,
 928                         const struct lock_struct *plocks,
 929                         int num_locks,
 930                         int *errno_ret)
 931 {
 932         off_t offset;
 933         off_t count;
 934         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 935         bool ret = True;
 936         size_t lock_count;
 937         TALLOC_CTX *l_ctx = NULL;
 938         struct lock_list *llist = NULL;
 939         struct lock_list *ll = NULL;
 940
 941         DEBUG(5, ("set_posix_lock_windows_flavour: File %s, offset = %ju, "
 942                   "count = %ju, type = %s\n", fsp_str_dbg(fsp),
 943                   (uintmax_t)u_offset, (uintmax_t)u_count,
 944                   posix_lock_type_name(lock_type)));
 945
 946         /*
 947          * If the requested lock won't fit in the POSIX range, we will
 948          * pretend it was successful.
 949          */
 950
 951         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
 952                 increment_windows_lock_ref_count(fsp);
 953                 return True;
 954         }
 955
 956         /*
 957          * Windows is very strange. It allows read locks to be overlayed
 958          * (even over a write lock), but leaves the write lock in force until the first
 959          * unlock. It also reference counts the locks. This means the following sequence :
 960          *
 961          * process1                                      process2
 962          * ------------------------------------------------------------------------
 963          * WRITE LOCK : start = 2, len = 10
 964          *                                            READ LOCK: start =0, len = 10 - FAIL
 965          * READ LOCK : start = 0, len = 14
 966          *                                            READ LOCK: start =0, len = 10 - FAIL
 967          * UNLOCK : start = 2, len = 10
 968          *                                            READ LOCK: start =0, len = 10 - OK
 969          *
 970          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 971          * would leave a single read lock over the 0-14 region.
 972          */
 973
 974         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
 975                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
 976                 return False;
 977         }
 978
 979         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
 980                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
 981                 talloc_destroy(l_ctx);
 982                 return False;
 983         }
 984
 985         /*
 986          * Create the initial list entry containing the
 987          * lock we want to add.
 988          */
 989
 990         ZERO_STRUCTP(ll);
 991         ll->start = offset;
 992         ll->size = count;
 993
 994         DLIST_ADD(llist, ll);
 995
 996         /*
 997          * The following call calculates if there are any
 998          * overlapping locks held by this process on
 999          * fd's open on the same file and splits this list
1000          * into a list of lock ranges that do not overlap with existing
1001          * POSIX locks.
1002          */
1003
1004         llist = posix_lock_list(l_ctx,
1005                                 llist,
1006                                 lock_ctx, /* Lock context llist belongs to. */
1007                                 fsp,
1008                                 plocks,
1009                                 num_locks);
1010
1011         /*
1012          * Add the POSIX locks on the list of ranges returned.
1013          * As the lock is supposed to be added atomically, we need to
1014          * back out all the locks if any one of these calls fail.
1015          */
1016
1017         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1018                 offset = ll->start;
1019                 count = ll->size;
1020
1021                 DEBUG(5, ("set_posix_lock_windows_flavour: Real lock: "
1022                           "Type = %s: offset = %ju, count = %ju\n",
1023                           posix_lock_type_name(posix_lock_type),
1024                           (uintmax_t)offset, (uintmax_t)count ));
1025
1026                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1027                         *errno_ret = errno;
1028                         DEBUG(5, ("set_posix_lock_windows_flavour: Lock "
1029                                   "fail !: Type = %s: offset = %ju, "
1030                                   "count = %ju. Errno = %s\n",
1031                                   posix_lock_type_name(posix_lock_type),
1032                                   (uintmax_t)offset, (uintmax_t)count,
1033                                   strerror(errno) ));
1034                         ret = False;
1035                         break;
1036                 }
1037         }
1038
1039         if (!ret) {
1040
1041                 /*
1042                  * Back out all the POSIX locks we have on fail.
1043                  */
1044
1045                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1046                         offset = ll->start;
1047                         count = ll->size;
1048
1049                         DEBUG(5, ("set_posix_lock_windows_flavour: Backing "
1050                                   "out locks: Type = %s: offset = %ju, "
1051                                   "count = %ju\n",
1052                                   posix_lock_type_name(posix_lock_type),
1053                                   (uintmax_t)offset, (uintmax_t)count ));
1054
1055                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1056                 }
1057         } else {
1058                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1059                 increment_windows_lock_ref_count(fsp);
1060         }
1061
1062         talloc_destroy(l_ctx);
1063         return ret;
1064 }
1065
1066 /****************************************************************************
1067  POSIX function to release a lock. Returns True if the
1068  lock could be released, False if not.
1069 ****************************************************************************/
1070
1071 bool release_posix_lock_windows_flavour(files_struct *fsp,
1072                                 uint64_t u_offset,
1073                                 uint64_t u_count,
1074                                 enum brl_type deleted_lock_type,
1075                                 const struct lock_context *lock_ctx,
1076                                 const struct lock_struct *plocks,
1077                                 int num_locks)
1078 {
1079         off_t offset;
1080         off_t count;
1081         bool ret = True;
1082         TALLOC_CTX *ul_ctx = NULL;
1083         struct lock_list *ulist = NULL;
1084         struct lock_list *ul = NULL;
1085
1086         DEBUG(5, ("release_posix_lock_windows_flavour: File %s, offset = %ju, "
1087                   "count = %ju\n", fsp_str_dbg(fsp),
1088                   (uintmax_t)u_offset, (uintmax_t)u_count));
1089
1090         /* Remember the number of Windows locks we have on this dev/ino pair. */
1091         decrement_windows_lock_ref_count(fsp);
1092
1093         /*
1094          * If the requested lock won't fit in the POSIX range, we will
1095          * pretend it was successful.
1096          */
1097
1098         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1099                 return True;
1100         }
1101
1102         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1103                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1104                 return False;
1105         }
1106
1107         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1108                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1109                 talloc_destroy(ul_ctx);
1110                 return False;
1111         }
1112
1113         /*
1114          * Create the initial list entry containing the
1115          * lock we want to remove.
1116          */
1117
1118         ZERO_STRUCTP(ul);
1119         ul->start = offset;
1120         ul->size = count;
1121
1122         DLIST_ADD(ulist, ul);
1123
1124         /*
1125          * The following call calculates if there are any
1126          * overlapping locks held by this process on
1127          * fd's open on the same file and creates a
1128          * list of unlock ranges that will allow
1129          * POSIX lock ranges to remain on the file whilst the
1130          * unlocks are performed.
1131          */
1132
1133         ulist = posix_lock_list(ul_ctx,
1134                                 ulist,
1135                                 lock_ctx, /* Lock context ulist belongs to. */
1136                                 fsp,
1137                                 plocks,
1138                                 num_locks);
1139
1140         /*
1141          * If there were any overlapped entries (list is > 1 or size or start have changed),
1142          * and the lock_type we just deleted from
1143          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1144          * the POSIX lock to a read lock. This allows any overlapping read locks
1145          * to be atomically maintained.
1146          */
1147
1148         if (deleted_lock_type == WRITE_LOCK &&
1149                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1150
1151                 DEBUG(5, ("release_posix_lock_windows_flavour: downgrading "
1152                           "lock to READ: offset = %ju, count = %ju\n",
1153                           (uintmax_t)offset, (uintmax_t)count ));
1154
1155                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1156                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1157                         talloc_destroy(ul_ctx);
1158                         return False;
1159                 }
1160         }
1161
1162         /*
1163          * Release the POSIX locks on the list of ranges returned.
1164          */
1165
1166         for(; ulist; ulist = ulist->next) {
1167                 offset = ulist->start;
1168                 count = ulist->size;
1169
1170                 DEBUG(5, ("release_posix_lock_windows_flavour: Real unlock: "
1171                           "offset = %ju, count = %ju\n",
1172                           (uintmax_t)offset, (uintmax_t)count ));
1173
1174                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1175                         ret = False;
1176                 }
1177         }
1178
1179         talloc_destroy(ul_ctx);
1180         return ret;
1181 }
1182
1183 /****************************************************************************
1184  Next - the functions that deal with mapping CIFS POSIX locks onto
1185  the underlying system POSIX locks.
1186 ****************************************************************************/
1187
1188 /****************************************************************************
1189  POSIX function to acquire a lock. Returns True if the
1190  lock could be granted, False if not.
1191  As POSIX locks don't stack or conflict (they just overwrite)
1192  we can map the requested lock directly onto a system one. We
1193  know it doesn't conflict with locks on other contexts as the
1194  upper layer would have refused it.
1195 ****************************************************************************/
1196
1197 bool set_posix_lock_posix_flavour(files_struct *fsp,
1198                         uint64_t u_offset,
1199                         uint64_t u_count,
1200                         enum brl_type lock_type,
1201                         int *errno_ret)
1202 {
1203         off_t offset;
1204         off_t count;
1205         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1206
1207         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1208                  "= %ju, type = %s\n", fsp_str_dbg(fsp),
1209                  (uintmax_t)u_offset, (uintmax_t)u_count,
1210                  posix_lock_type_name(lock_type)));
1211
1212         /*
1213          * If the requested lock won't fit in the POSIX range, we will
1214          * pretend it was successful.
1215          */
1216
1217         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1218                 return True;
1219         }
1220
1221         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1222                 *errno_ret = errno;
1223                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1224                         posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1225                 return False;
1226         }
1227         return True;
1228 }
1229
1230 /****************************************************************************
1231  POSIX function to release a lock. Returns True if the
1232  lock could be released, False if not.
1233  We are given a complete lock state from the upper layer which is what the lock
1234  state should be after the unlock has already been done, so what
1235  we do is punch out holes in the unlock range where locks owned by this process
1236  have a different lock context.
1237 ****************************************************************************/
1238
1239 bool release_posix_lock_posix_flavour(files_struct *fsp,
1240                                 uint64_t u_offset,
1241                                 uint64_t u_count,
1242                                 const struct lock_context *lock_ctx,
1243                                 const struct lock_struct *plocks,
1244                                 int num_locks)
1245 {
1246         bool ret = True;
1247         off_t offset;
1248         off_t count;
1249         TALLOC_CTX *ul_ctx = NULL;
1250         struct lock_list *ulist = NULL;
1251         struct lock_list *ul = NULL;
1252
1253         DEBUG(5, ("release_posix_lock_posix_flavour: File %s, offset = %ju, "
1254                   "count = %ju\n", fsp_str_dbg(fsp),
1255                   (uintmax_t)u_offset, (uintmax_t)u_count));
1256
1257         /*
1258          * If the requested lock won't fit in the POSIX range, we will
1259          * pretend it was successful.
1260          */
1261
1262         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1263                 return True;
1264         }
1265
1266         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1267                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1268                 return False;
1269         }
1270
1271         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1272                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1273                 talloc_destroy(ul_ctx);
1274                 return False;
1275         }
1276
1277         /*
1278          * Create the initial list entry containing the
1279          * lock we want to remove.
1280          */
1281
1282         ZERO_STRUCTP(ul);
1283         ul->start = offset;
1284         ul->size = count;
1285
1286         DLIST_ADD(ulist, ul);
1287
1288         /*
1289          * Walk the given array creating a linked list
1290          * of unlock requests.
1291          */
1292
1293         ulist = posix_lock_list(ul_ctx,
1294                                 ulist,
1295                                 lock_ctx, /* Lock context ulist belongs to. */
1296                                 fsp,
1297                                 plocks,
1298                                 num_locks);
1299
1300         /*
1301          * Release the POSIX locks on the list of ranges returned.
1302          */
1303
1304         for(; ulist; ulist = ulist->next) {
1305                 offset = ulist->start;
1306                 count = ulist->size;
1307
1308                 DEBUG(5, ("release_posix_lock_posix_flavour: Real unlock: "
1309                           "offset = %ju, count = %ju\n",
1310                           (uintmax_t)offset, (uintmax_t)count ));
1311
1312                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1313                         ret = False;
1314                 }
1315         }
1316
1317         talloc_destroy(ul_ctx);
1318         return ret;
1319 }