source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2006
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Revision History:
  20
  21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/filesys.h"
  26 #include "locking/proto.h"
  27 #include "dbwrap/dbwrap.h"
  28 #include "dbwrap/dbwrap_rbt.h"
  29 #include "util_tdb.h"
  30
  31 #undef DBGC_CLASS
  32 #define DBGC_CLASS DBGC_LOCKING
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static struct db_context *posix_pending_close_db;
  39
  40 /****************************************************************************
  41  First - the functions that deal with the underlying system locks - these
  42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
  43  POSIX locks onto POSIX.
  44 ****************************************************************************/
  45
  46 /****************************************************************************
  47  Utility function to map a lock type correctly depending on the open
  48  mode of a file.
  49 ****************************************************************************/
  50
  51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  52 {
  53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
  54                 /*
  55                  * Many UNIX's cannot get a write lock on a file opened read-only.
  56                  * Win32 locking semantics allow this.
  57                  * Do the best we can and attempt a read-only lock.
  58                  */
  59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
  60                 return F_RDLCK;
  61         }
  62
  63         /*
  64          * This return should be the most normal, as we attempt
  65          * to always open files read/write.
  66          */
  67
  68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  69 }
  70
  71 /****************************************************************************
  72  Debugging aid :-).
  73 ****************************************************************************/
  74
  75 static const char *posix_lock_type_name(int lock_type)
  76 {
  77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  78 }
  79
  80 /****************************************************************************
  81  Check to see if the given unsigned lock range is within the possible POSIX
  82  range. Modifies the given args to be in range if possible, just returns
  83  False if not.
  84 ****************************************************************************/
  85
  86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
  87                                 uint64_t u_offset, uint64_t u_count)
  88 {
  89         off_t offset = (off_t)u_offset;
  90         off_t count = (off_t)u_count;
  91
  92         /*
  93          * For the type of system we are, attempt to
  94          * find the maximum positive lock offset as an off_t.
  95          */
  96
  97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  98
  99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 100 #else
 101         /*
 102          * In this case off_t is 64 bits,
 103          * and the underlying system can handle 64 bit signed locks.
 104          */
 105
 106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
 107         off_t mask = (mask2<<1);
 108         off_t max_positive_lock_offset = ~mask;
 109
 110 #endif
 111         /*
 112          * POSIX locks of length zero mean lock to end-of-file.
 113          * Win32 locks of length zero are point probes. Ignore
 114          * any Win32 locks of length zero. JRA.
 115          */
 116
 117         if (count == (off_t)0) {
 118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 119                 return False;
 120         }
 121
 122         /*
 123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 124          * ignore this lock.
 125          */
 126
 127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
 128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
 130                 return False;
 131         }
 132
 133         /*
 134          * We must truncate the count to less than max_positive_lock_offset.
 135          */
 136
 137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
 138                 count = max_positive_lock_offset;
 139         }
 140
 141         /*
 142          * Truncate count to end at max lock offset.
 143          */
 144
 145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
 146                 count = max_positive_lock_offset - offset;
 147         }
 148
 149         /*
 150          * If we ate all the count, ignore this lock.
 151          */
 152
 153         if (count == 0) {
 154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 155                                 (double)u_offset, (double)u_count ));
 156                 return False;
 157         }
 158
 159         /*
 160          * The mapping was successful.
 161          */
 162
 163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 164                         (double)offset, (double)count ));
 165
 166         *offset_out = offset;
 167         *count_out = count;
 168
 169         return True;
 170 }
 171
 172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
 173                        struct files_struct *fsp, int op, off_t offset,
 174                        off_t count, int type)
 175 {
 176         VFS_FIND(lock);
 177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
 178 }
 179
 180 /****************************************************************************
 181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 182  broken NFS implementations.
 183 ****************************************************************************/
 184
 185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
 186 {
 187         bool ret;
 188
 189         DEBUG(8,("posix_fcntl_lock %d %d %jd %jd %d\n",
 190                  fsp->fh->fd,op,(intmax_t)offset,(intmax_t)count,type));
 191
 192         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 193
 194         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 195
 196                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 197                                         (double)offset,(double)count));
 198                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 199                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 200
 201                 /*
 202                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 203                  * 32 bit NFS mounted filesystems. Just ignore it.
 204                  */
 205
 206                 if (offset & ~((off_t)0x7fffffff)) {
 207                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 208                         return True;
 209                 }
 210
 211                 if (count & ~((off_t)0x7fffffff)) {
 212                         /* 32 bit NFS file system, retry with smaller offset */
 213                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 214                         errno = 0;
 215                         count &= 0x7fffffff;
 216                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 217                 }
 218         }
 219
 220         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 221         return ret;
 222 }
 223
 224 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
 225                           struct files_struct *fsp, off_t *poffset,
 226                           off_t *pcount, int *ptype, pid_t *ppid)
 227 {
 228         VFS_FIND(getlock);
 229         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
 230                                        ppid);
 231 }
 232
 233 /****************************************************************************
 234  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
 235  broken NFS implementations.
 236 ****************************************************************************/
 237
 238 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
 239 {
 240         pid_t pid;
 241         bool ret;
 242
 243         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
 244                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
 245
 246         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
 247
 248         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 249
 250                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 251                                         (double)*poffset,(double)*pcount));
 252                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 253                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 254
 255                 /*
 256                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 257                  * 32 bit NFS mounted filesystems. Just ignore it.
 258                  */
 259
 260                 if (*poffset & ~((off_t)0x7fffffff)) {
 261                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 262                         return True;
 263                 }
 264
 265                 if (*pcount & ~((off_t)0x7fffffff)) {
 266                         /* 32 bit NFS file system, retry with smaller offset */
 267                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 268                         errno = 0;
 269                         *pcount &= 0x7fffffff;
 270                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
 271                 }
 272         }
 273
 274         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
 275         return ret;
 276 }
 277
 278 /****************************************************************************
 279  POSIX function to see if a file region is locked. Returns True if the
 280  region is locked, False otherwise.
 281 ****************************************************************************/
 282
 283 bool is_posix_locked(files_struct *fsp,
 284                         uint64_t *pu_offset,
 285                         uint64_t *pu_count,
 286                         enum brl_type *plock_type,
 287                         enum brl_flavour lock_flav)
 288 {
 289         off_t offset;
 290         off_t count;
 291         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
 292
 293         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
 294                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
 295                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
 296
 297         /*
 298          * If the requested lock won't fit in the POSIX range, we will
 299          * never set it, so presume it is not locked.
 300          */
 301
 302         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
 303                 return False;
 304         }
 305
 306         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
 307                 return False;
 308         }
 309
 310         if (posix_lock_type == F_UNLCK) {
 311                 return False;
 312         }
 313
 314         if (lock_flav == POSIX_LOCK) {
 315                 /* Only POSIX lock queries need to know the details. */
 316                 *pu_offset = (uint64_t)offset;
 317                 *pu_count = (uint64_t)count;
 318                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
 319         }
 320         return True;
 321 }
 322
 323 /****************************************************************************
 324  Next - the functions that deal with in memory database storing representations
 325  of either Windows CIFS locks or POSIX CIFS locks.
 326 ****************************************************************************/
 327
 328 /* The key used in the in-memory POSIX databases. */
 329
 330 struct lock_ref_count_key {
 331         struct file_id id;
 332         char r;
 333 };
 334
 335 /*******************************************************************
 336  Form a static locking key for a dev/inode pair for the lock ref count
 337 ******************************************************************/
 338
 339 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
 340                                           struct lock_ref_count_key *tmp)
 341 {
 342         ZERO_STRUCTP(tmp);
 343         tmp->id = fsp->file_id;
 344         tmp->r = 'r';
 345         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
 346 }
 347
 348 /*******************************************************************
 349  Convenience function to get an fd_array key from an fsp.
 350 ******************************************************************/
 351
 352 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
 353 {
 354         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
 355 }
 356
 357 /*******************************************************************
 358  Create the in-memory POSIX lock databases.
 359 ********************************************************************/
 360
 361 bool posix_locking_init(bool read_only)
 362 {
 363         if (posix_pending_close_db != NULL) {
 364                 return true;
 365         }
 366
 367         posix_pending_close_db = db_open_rbt(NULL);
 368
 369         if (posix_pending_close_db == NULL) {
 370                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
 371                 return false;
 372         }
 373
 374         return true;
 375 }
 376
 377 /*******************************************************************
 378  Delete the in-memory POSIX lock databases.
 379 ********************************************************************/
 380
 381 bool posix_locking_end(void)
 382 {
 383         /*
 384          * Shouldn't we close all fd's here?
 385          */
 386         TALLOC_FREE(posix_pending_close_db);
 387         return true;
 388 }
 389
 390 /****************************************************************************
 391  Next - the functions that deal with storing fd's that have outstanding
 392  POSIX locks when closed.
 393 ****************************************************************************/
 394
 395 /****************************************************************************
 396  The records in posix_pending_close_db are composed of an array of
 397  ints keyed by dev/ino pair. Those ints are the fd's that were open on
 398  this dev/ino pair that should have been closed, but can't as the lock
 399  ref count is non zero.
 400 ****************************************************************************/
 401
 402 /****************************************************************************
 403  Keep a reference count of the number of Windows locks open on this dev/ino
 404  pair. Creates entry if it doesn't exist.
 405 ****************************************************************************/
 406
 407 static void increment_windows_lock_ref_count(files_struct *fsp)
 408 {
 409         struct lock_ref_count_key tmp;
 410         int32_t lock_ref_count = 0;
 411         NTSTATUS status;
 412
 413         status = dbwrap_change_int32_atomic(
 414                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 415                 &lock_ref_count, 1);
 416
 417         SMB_ASSERT(NT_STATUS_IS_OK(status));
 418         SMB_ASSERT(lock_ref_count < INT32_MAX);
 419
 420         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
 421                   fsp_str_dbg(fsp), (int)lock_ref_count));
 422 }
 423
 424 /****************************************************************************
 425  Bulk delete - subtract as many locks as we've just deleted.
 426 ****************************************************************************/
 427
 428 static void decrement_windows_lock_ref_count(files_struct *fsp)
 429 {
 430         struct lock_ref_count_key tmp;
 431         int32_t lock_ref_count = 0;
 432         NTSTATUS status;
 433
 434         status = dbwrap_change_int32_atomic(
 435                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 436                 &lock_ref_count, -1);
 437
 438         SMB_ASSERT(NT_STATUS_IS_OK(status));
 439         SMB_ASSERT(lock_ref_count >= 0);
 440
 441         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
 442                   fsp_str_dbg(fsp), (int)lock_ref_count));
 443 }
 444
 445 /****************************************************************************
 446  Fetch the lock ref count.
 447 ****************************************************************************/
 448
 449 static int32_t get_windows_lock_ref_count(files_struct *fsp)
 450 {
 451         struct lock_ref_count_key tmp;
 452         NTSTATUS status;
 453         int32_t lock_ref_count = 0;
 454
 455         status = dbwrap_fetch_int32(
 456                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 457                 &lock_ref_count);
 458
 459         if (!NT_STATUS_IS_OK(status) &&
 460             !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 461                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
 462                           "lock ref count for file %s: %s\n",
 463                           fsp_str_dbg(fsp), nt_errstr(status)));
 464         }
 465         return lock_ref_count;
 466 }
 467
 468 /****************************************************************************
 469  Delete a lock_ref_count entry.
 470 ****************************************************************************/
 471
 472 static void delete_windows_lock_ref_count(files_struct *fsp)
 473 {
 474         struct lock_ref_count_key tmp;
 475
 476         /* Not a bug if it doesn't exist - no locks were ever granted. */
 477
 478         dbwrap_delete(posix_pending_close_db,
 479                       locking_ref_count_key_fsp(fsp, &tmp));
 480
 481         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
 482                   fsp_str_dbg(fsp)));
 483 }
 484
 485 /****************************************************************************
 486  Add an fd to the pending close tdb.
 487 ****************************************************************************/
 488
 489 static void add_fd_to_close_entry(files_struct *fsp)
 490 {
 491         struct db_record *rec;
 492         int *fds;
 493         size_t num_fds;
 494         NTSTATUS status;
 495         TDB_DATA value;
 496
 497         rec = dbwrap_fetch_locked(
 498                 posix_pending_close_db, talloc_tos(),
 499                 fd_array_key_fsp(fsp));
 500
 501         SMB_ASSERT(rec != NULL);
 502
 503         value = dbwrap_record_get_value(rec);
 504         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
 505
 506         num_fds = value.dsize / sizeof(int);
 507         fds = talloc_array(rec, int, num_fds+1);
 508
 509         SMB_ASSERT(fds != NULL);
 510
 511         memcpy(fds, value.dptr, value.dsize);
 512         fds[num_fds] = fsp->fh->fd;
 513
 514         status = dbwrap_record_store(
 515                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
 516
 517         SMB_ASSERT(NT_STATUS_IS_OK(status));
 518
 519         TALLOC_FREE(rec);
 520
 521         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
 522                   fsp->fh->fd, fsp_str_dbg(fsp)));
 523 }
 524
 525 /****************************************************************************
 526  Remove all fd entries for a specific dev/inode pair from the tdb.
 527 ****************************************************************************/
 528
 529 static void delete_close_entries(files_struct *fsp)
 530 {
 531         struct db_record *rec;
 532
 533         rec = dbwrap_fetch_locked(
 534                 posix_pending_close_db, talloc_tos(),
 535                 fd_array_key_fsp(fsp));
 536
 537         SMB_ASSERT(rec != NULL);
 538         dbwrap_record_delete(rec);
 539         TALLOC_FREE(rec);
 540 }
 541
 542 /****************************************************************************
 543  Get the array of POSIX pending close records for an open fsp. Returns number
 544  of entries.
 545 ****************************************************************************/
 546
 547 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
 548                                               files_struct *fsp, int **entries)
 549 {
 550         TDB_DATA dbuf;
 551         NTSTATUS status;
 552
 553         status = dbwrap_fetch(
 554                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
 555                 &dbuf);
 556
 557         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 558                 *entries = NULL;
 559                 return 0;
 560         }
 561
 562         SMB_ASSERT(NT_STATUS_IS_OK(status));
 563
 564         if (dbuf.dsize == 0) {
 565                 *entries = NULL;
 566                 return 0;
 567         }
 568
 569         *entries = (int *)dbuf.dptr;
 570         return (size_t)(dbuf.dsize / sizeof(int));
 571 }
 572
 573 /****************************************************************************
 574  Deal with pending closes needed by POSIX locking support.
 575  Note that posix_locking_close_file() is expected to have been called
 576  to delete all locks on this fsp before this function is called.
 577 ****************************************************************************/
 578
 579 int fd_close_posix(struct files_struct *fsp)
 580 {
 581         int saved_errno = 0;
 582         int ret;
 583         int *fd_array = NULL;
 584         size_t count, i;
 585
 586         if (!lp_locking(fsp->conn->params) ||
 587             !lp_posix_locking(fsp->conn->params))
 588         {
 589                 /*
 590                  * No locking or POSIX to worry about or we want POSIX semantics
 591                  * which will lose all locks on all fd's open on this dev/inode,
 592                  * just close.
 593                  */
 594                 return close(fsp->fh->fd);
 595         }
 596
 597         if (get_windows_lock_ref_count(fsp)) {
 598
 599                 /*
 600                  * There are outstanding locks on this dev/inode pair on
 601                  * other fds. Add our fd to the pending close tdb and set
 602                  * fsp->fh->fd to -1.
 603                  */
 604
 605                 add_fd_to_close_entry(fsp);
 606                 return 0;
 607         }
 608
 609         /*
 610          * No outstanding locks. Get the pending close fd's
 611          * from the tdb and close them all.
 612          */
 613
 614         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
 615
 616         if (count) {
 617                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
 618                           (unsigned int)count));
 619
 620                 for(i = 0; i < count; i++) {
 621                         if (close(fd_array[i]) == -1) {
 622                                 saved_errno = errno;
 623                         }
 624                 }
 625
 626                 /*
 627                  * Delete all fd's stored in the tdb
 628                  * for this dev/inode pair.
 629                  */
 630
 631                 delete_close_entries(fsp);
 632         }
 633
 634         TALLOC_FREE(fd_array);
 635
 636         /* Don't need a lock ref count on this dev/ino anymore. */
 637         delete_windows_lock_ref_count(fsp);
 638
 639         /*
 640          * Finally close the fd associated with this fsp.
 641          */
 642
 643         ret = close(fsp->fh->fd);
 644
 645         if (ret == 0 && saved_errno != 0) {
 646                 errno = saved_errno;
 647                 ret = -1;
 648         }
 649
 650         return ret;
 651 }
 652
 653 /****************************************************************************
 654  Next - the functions that deal with the mapping CIFS Windows locks onto
 655  the underlying system POSIX locks.
 656 ****************************************************************************/
 657
 658 /*
 659  * Structure used when splitting a lock range
 660  * into a POSIX lock range. Doubly linked list.
 661  */
 662
 663 struct lock_list {
 664         struct lock_list *next;
 665         struct lock_list *prev;
 666         off_t start;
 667         off_t size;
 668 };
 669
 670 /****************************************************************************
 671  Create a list of lock ranges that don't overlap a given range. Used in calculating
 672  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 673  understand it :-).
 674 ****************************************************************************/
 675
 676 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
 677                                                 struct lock_list *lhead,
 678                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
 679                                                 files_struct *fsp,
 680                                                 const struct lock_struct *plocks,
 681                                                 int num_locks)
 682 {
 683         int i;
 684
 685         /*
 686          * Check the current lock list on this dev/inode pair.
 687          * Quit if the list is deleted.
 688          */
 689
 690         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 691                 (double)lhead->start, (double)lhead->size ));
 692
 693         for (i=0; i<num_locks && lhead; i++) {
 694                 const struct lock_struct *lock = &plocks[i];
 695                 struct lock_list *l_curr;
 696
 697                 /* Ignore all but read/write locks. */
 698                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
 699                         continue;
 700                 }
 701
 702                 /* Ignore locks not owned by this process. */
 703                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
 704                         continue;
 705                 }
 706
 707                 /*
 708                  * Walk the lock list, checking for overlaps. Note that
 709                  * the lock list can expand within this loop if the current
 710                  * range being examined needs to be split.
 711                  */
 712
 713                 for (l_curr = lhead; l_curr;) {
 714
 715                         DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
 716                                 (unsigned long long)lock->fnum,
 717                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 718
 719                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 720                                  (lock->start >= (l_curr->start + l_curr->size))) {
 721
 722                                 /* No overlap with existing lock - leave this range alone. */
 723 /*********************************************
 724                                              +---------+
 725                                              | l_curr  |
 726                                              +---------+
 727                                 +-------+
 728                                 | lock  |
 729                                 +-------+
 730 OR....
 731              +---------+
 732              |  l_curr |
 733              +---------+
 734 **********************************************/
 735
 736                                 DEBUG(10,(" no overlap case.\n" ));
 737
 738                                 l_curr = l_curr->next;
 739
 740                         } else if ( (l_curr->start >= lock->start) &&
 741                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 742
 743                                 /*
 744                                  * This range is completely overlapped by this existing lock range
 745                                  * and thus should have no effect. Delete it from the list.
 746                                  */
 747 /*********************************************
 748                 +---------+
 749                 |  l_curr |
 750                 +---------+
 751         +---------------------------+
 752         |       lock                |
 753         +---------------------------+
 754 **********************************************/
 755                                 /* Save the next pointer */
 756                                 struct lock_list *ul_next = l_curr->next;
 757
 758                                 DEBUG(10,(" delete case.\n" ));
 759
 760                                 DLIST_REMOVE(lhead, l_curr);
 761                                 if(lhead == NULL) {
 762                                         break; /* No more list... */
 763                                 }
 764
 765                                 l_curr = ul_next;
 766
 767                         } else if ( (l_curr->start >= lock->start) &&
 768                                                 (l_curr->start < lock->start + lock->size) &&
 769                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 770
 771                                 /*
 772                                  * This range overlaps the existing lock range at the high end.
 773                                  * Truncate by moving start to existing range end and reducing size.
 774                                  */
 775 /*********************************************
 776                 +---------------+
 777                 |  l_curr       |
 778                 +---------------+
 779         +---------------+
 780         |    lock       |
 781         +---------------+
 782 BECOMES....
 783                         +-------+
 784                         | l_curr|
 785                         +-------+
 786 **********************************************/
 787
 788                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 789                                 l_curr->start = lock->start + lock->size;
 790
 791                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
 792                                                                 (double)l_curr->start, (double)l_curr->size ));
 793
 794                                 l_curr = l_curr->next;
 795
 796                         } else if ( (l_curr->start < lock->start) &&
 797                                                 (l_curr->start + l_curr->size > lock->start) &&
 798                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 799
 800                                 /*
 801                                  * This range overlaps the existing lock range at the low end.
 802                                  * Truncate by reducing size.
 803                                  */
 804 /*********************************************
 805    +---------------+
 806    |  l_curr       |
 807    +---------------+
 808            +---------------+
 809            |    lock       |
 810            +---------------+
 811 BECOMES....
 812    +-------+
 813    | l_curr|
 814    +-------+
 815 **********************************************/
 816
 817                                 l_curr->size = lock->start - l_curr->start;
 818
 819                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
 820                                                                 (double)l_curr->start, (double)l_curr->size ));
 821
 822                                 l_curr = l_curr->next;
 823
 824                         } else if ( (l_curr->start < lock->start) &&
 825                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 826                                 /*
 827                                  * Worst case scenario. Range completely overlaps an existing
 828                                  * lock range. Split the request into two, push the new (upper) request
 829                                  * into the dlink list, and continue with the entry after l_new (as we
 830                                  * know that l_new will not overlap with this lock).
 831                                  */
 832 /*********************************************
 833         +---------------------------+
 834         |        l_curr             |
 835         +---------------------------+
 836                 +---------+
 837                 | lock    |
 838                 +---------+
 839 BECOMES.....
 840         +-------+         +---------+
 841         | l_curr|         | l_new   |
 842         +-------+         +---------+
 843 **********************************************/
 844                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
 845
 846                                 if(l_new == NULL) {
 847                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 848                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 849                                 }
 850
 851                                 ZERO_STRUCTP(l_new);
 852                                 l_new->start = lock->start + lock->size;
 853                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 854
 855                                 /* Truncate the l_curr. */
 856                                 l_curr->size = lock->start - l_curr->start;
 857
 858                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
 859 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 860                                                                 (double)l_new->start, (double)l_new->size ));
 861
 862                                 /*
 863                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 864                                  */
 865                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
 866
 867                                 /* And move after the link we added. */
 868                                 l_curr = l_new->next;
 869
 870                         } else {
 871
 872                                 /*
 873                                  * This logic case should never happen. Ensure this is the
 874                                  * case by forcing an abort.... Remove in production.
 875                                  */
 876                                 char *msg = NULL;
 877
 878                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 879 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
 880                                         smb_panic(msg);
 881                                 } else {
 882                                         smb_panic("posix_lock_list");
 883                                 }
 884                         }
 885                 } /* end for ( l_curr = lhead; l_curr;) */
 886         } /* end for (i=0; i<num_locks && ul_head; i++) */
 887
 888         return lhead;
 889 }
 890
 891 /****************************************************************************
 892  POSIX function to acquire a lock. Returns True if the
 893  lock could be granted, False if not.
 894 ****************************************************************************/
 895
 896 bool set_posix_lock_windows_flavour(files_struct *fsp,
 897                         uint64_t u_offset,
 898                         uint64_t u_count,
 899                         enum brl_type lock_type,
 900                         const struct lock_context *lock_ctx,
 901                         const struct lock_struct *plocks,
 902                         int num_locks,
 903                         int *errno_ret)
 904 {
 905         off_t offset;
 906         off_t count;
 907         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 908         bool ret = True;
 909         size_t lock_count;
 910         TALLOC_CTX *l_ctx = NULL;
 911         struct lock_list *llist = NULL;
 912         struct lock_list *ll = NULL;
 913
 914         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
 915                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
 916                  (double)u_offset, (double)u_count,
 917                  posix_lock_type_name(lock_type)));
 918
 919         /*
 920          * If the requested lock won't fit in the POSIX range, we will
 921          * pretend it was successful.
 922          */
 923
 924         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
 925                 increment_windows_lock_ref_count(fsp);
 926                 return True;
 927         }
 928
 929         /*
 930          * Windows is very strange. It allows read locks to be overlayed
 931          * (even over a write lock), but leaves the write lock in force until the first
 932          * unlock. It also reference counts the locks. This means the following sequence :
 933          *
 934          * process1                                      process2
 935          * ------------------------------------------------------------------------
 936          * WRITE LOCK : start = 2, len = 10
 937          *                                            READ LOCK: start =0, len = 10 - FAIL
 938          * READ LOCK : start = 0, len = 14
 939          *                                            READ LOCK: start =0, len = 10 - FAIL
 940          * UNLOCK : start = 2, len = 10
 941          *                                            READ LOCK: start =0, len = 10 - OK
 942          *
 943          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 944          * would leave a single read lock over the 0-14 region.
 945          */
 946
 947         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
 948                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
 949                 return False;
 950         }
 951
 952         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
 953                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
 954                 talloc_destroy(l_ctx);
 955                 return False;
 956         }
 957
 958         /*
 959          * Create the initial list entry containing the
 960          * lock we want to add.
 961          */
 962
 963         ZERO_STRUCTP(ll);
 964         ll->start = offset;
 965         ll->size = count;
 966
 967         DLIST_ADD(llist, ll);
 968
 969         /*
 970          * The following call calculates if there are any
 971          * overlapping locks held by this process on
 972          * fd's open on the same file and splits this list
 973          * into a list of lock ranges that do not overlap with existing
 974          * POSIX locks.
 975          */
 976
 977         llist = posix_lock_list(l_ctx,
 978                                 llist,
 979                                 lock_ctx, /* Lock context llist belongs to. */
 980                                 fsp,
 981                                 plocks,
 982                                 num_locks);
 983
 984         /*
 985          * Add the POSIX locks on the list of ranges returned.
 986          * As the lock is supposed to be added atomically, we need to
 987          * back out all the locks if any one of these calls fail.
 988          */
 989
 990         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
 991                 offset = ll->start;
 992                 count = ll->size;
 993
 994                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
 995                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
 996
 997                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
 998                         *errno_ret = errno;
 999                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1000                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1001                         ret = False;
1002                         break;
1003                 }
1004         }
1005
1006         if (!ret) {
1007
1008                 /*
1009                  * Back out all the POSIX locks we have on fail.
1010                  */
1011
1012                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1013                         offset = ll->start;
1014                         count = ll->size;
1015
1016                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1017                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1018
1019                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1020                 }
1021         } else {
1022                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1023                 increment_windows_lock_ref_count(fsp);
1024         }
1025
1026         talloc_destroy(l_ctx);
1027         return ret;
1028 }
1029
1030 /****************************************************************************
1031  POSIX function to release a lock. Returns True if the
1032  lock could be released, False if not.
1033 ****************************************************************************/
1034
1035 bool release_posix_lock_windows_flavour(files_struct *fsp,
1036                                 uint64_t u_offset,
1037                                 uint64_t u_count,
1038                                 enum brl_type deleted_lock_type,
1039                                 const struct lock_context *lock_ctx,
1040                                 const struct lock_struct *plocks,
1041                                 int num_locks)
1042 {
1043         off_t offset;
1044         off_t count;
1045         bool ret = True;
1046         TALLOC_CTX *ul_ctx = NULL;
1047         struct lock_list *ulist = NULL;
1048         struct lock_list *ul = NULL;
1049
1050         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1051                  "count = %.0f\n", fsp_str_dbg(fsp),
1052                  (double)u_offset, (double)u_count));
1053
1054         /* Remember the number of Windows locks we have on this dev/ino pair. */
1055         decrement_windows_lock_ref_count(fsp);
1056
1057         /*
1058          * If the requested lock won't fit in the POSIX range, we will
1059          * pretend it was successful.
1060          */
1061
1062         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1063                 return True;
1064         }
1065
1066         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1067                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1068                 return False;
1069         }
1070
1071         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1072                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1073                 talloc_destroy(ul_ctx);
1074                 return False;
1075         }
1076
1077         /*
1078          * Create the initial list entry containing the
1079          * lock we want to remove.
1080          */
1081
1082         ZERO_STRUCTP(ul);
1083         ul->start = offset;
1084         ul->size = count;
1085
1086         DLIST_ADD(ulist, ul);
1087
1088         /*
1089          * The following call calculates if there are any
1090          * overlapping locks held by this process on
1091          * fd's open on the same file and creates a
1092          * list of unlock ranges that will allow
1093          * POSIX lock ranges to remain on the file whilst the
1094          * unlocks are performed.
1095          */
1096
1097         ulist = posix_lock_list(ul_ctx,
1098                                 ulist,
1099                                 lock_ctx, /* Lock context ulist belongs to. */
1100                                 fsp,
1101                                 plocks,
1102                                 num_locks);
1103
1104         /*
1105          * If there were any overlapped entries (list is > 1 or size or start have changed),
1106          * and the lock_type we just deleted from
1107          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1108          * the POSIX lock to a read lock. This allows any overlapping read locks
1109          * to be atomically maintained.
1110          */
1111
1112         if (deleted_lock_type == WRITE_LOCK &&
1113                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1114
1115                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1116                         (double)offset, (double)count ));
1117
1118                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1119                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1120                         talloc_destroy(ul_ctx);
1121                         return False;
1122                 }
1123         }
1124
1125         /*
1126          * Release the POSIX locks on the list of ranges returned.
1127          */
1128
1129         for(; ulist; ulist = ulist->next) {
1130                 offset = ulist->start;
1131                 count = ulist->size;
1132
1133                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1134                         (double)offset, (double)count ));
1135
1136                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1137                         ret = False;
1138                 }
1139         }
1140
1141         talloc_destroy(ul_ctx);
1142         return ret;
1143 }
1144
1145 /****************************************************************************
1146  Next - the functions that deal with mapping CIFS POSIX locks onto
1147  the underlying system POSIX locks.
1148 ****************************************************************************/
1149
1150 /****************************************************************************
1151  POSIX function to acquire a lock. Returns True if the
1152  lock could be granted, False if not.
1153  As POSIX locks don't stack or conflict (they just overwrite)
1154  we can map the requested lock directly onto a system one. We
1155  know it doesn't conflict with locks on other contexts as the
1156  upper layer would have refused it.
1157 ****************************************************************************/
1158
1159 bool set_posix_lock_posix_flavour(files_struct *fsp,
1160                         uint64_t u_offset,
1161                         uint64_t u_count,
1162                         enum brl_type lock_type,
1163                         int *errno_ret)
1164 {
1165         off_t offset;
1166         off_t count;
1167         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1168
1169         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %ju, count "
1170                  "= %ju, type = %s\n", fsp_str_dbg(fsp),
1171                  (uintmax_t)u_offset, (uintmax_t)u_count,
1172                  posix_lock_type_name(lock_type)));
1173
1174         /*
1175          * If the requested lock won't fit in the POSIX range, we will
1176          * pretend it was successful.
1177          */
1178
1179         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1180                 return True;
1181         }
1182
1183         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1184                 *errno_ret = errno;
1185                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %ju, count = %ju. Errno = %s\n",
1186                         posix_lock_type_name(posix_lock_type), (intmax_t)offset, (intmax_t)count, strerror(errno) ));
1187                 return False;
1188         }
1189         return True;
1190 }
1191
1192 /****************************************************************************
1193  POSIX function to release a lock. Returns True if the
1194  lock could be released, False if not.
1195  We are given a complete lock state from the upper layer which is what the lock
1196  state should be after the unlock has already been done, so what
1197  we do is punch out holes in the unlock range where locks owned by this process
1198  have a different lock context.
1199 ****************************************************************************/
1200
1201 bool release_posix_lock_posix_flavour(files_struct *fsp,
1202                                 uint64_t u_offset,
1203                                 uint64_t u_count,
1204                                 const struct lock_context *lock_ctx,
1205                                 const struct lock_struct *plocks,
1206                                 int num_locks)
1207 {
1208         bool ret = True;
1209         off_t offset;
1210         off_t count;
1211         TALLOC_CTX *ul_ctx = NULL;
1212         struct lock_list *ulist = NULL;
1213         struct lock_list *ul = NULL;
1214
1215         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1216                  "count = %.0f\n", fsp_str_dbg(fsp),
1217                  (double)u_offset, (double)u_count));
1218
1219         /*
1220          * If the requested lock won't fit in the POSIX range, we will
1221          * pretend it was successful.
1222          */
1223
1224         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1225                 return True;
1226         }
1227
1228         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1229                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1230                 return False;
1231         }
1232
1233         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1234                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1235                 talloc_destroy(ul_ctx);
1236                 return False;
1237         }
1238
1239         /*
1240          * Create the initial list entry containing the
1241          * lock we want to remove.
1242          */
1243
1244         ZERO_STRUCTP(ul);
1245         ul->start = offset;
1246         ul->size = count;
1247
1248         DLIST_ADD(ulist, ul);
1249
1250         /*
1251          * Walk the given array creating a linked list
1252          * of unlock requests.
1253          */
1254
1255         ulist = posix_lock_list(ul_ctx,
1256                                 ulist,
1257                                 lock_ctx, /* Lock context ulist belongs to. */
1258                                 fsp,
1259                                 plocks,
1260                                 num_locks);
1261
1262         /*
1263          * Release the POSIX locks on the list of ranges returned.
1264          */
1265
1266         for(; ulist; ulist = ulist->next) {
1267                 offset = ulist->start;
1268                 count = ulist->size;
1269
1270                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1271                         (double)offset, (double)count ));
1272
1273                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1274                         ret = False;
1275                 }
1276         }
1277
1278         talloc_destroy(ul_ctx);
1279         return ret;
1280 }