source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2006
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Revision History:
  20
  21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/filesys.h"
  26 #include "locking/proto.h"
  27 #include "dbwrap/dbwrap.h"
  28 #include "dbwrap/dbwrap_rbt.h"
  29 #include "util_tdb.h"
  30
  31 #undef DBGC_CLASS
  32 #define DBGC_CLASS DBGC_LOCKING
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static struct db_context *posix_pending_close_db;
  39
  40 /****************************************************************************
  41  First - the functions that deal with the underlying system locks - these
  42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
  43  POSIX locks onto POSIX.
  44 ****************************************************************************/
  45
  46 /****************************************************************************
  47  Utility function to map a lock type correctly depending on the open
  48  mode of a file.
  49 ****************************************************************************/
  50
  51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  52 {
  53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
  54                 /*
  55                  * Many UNIX's cannot get a write lock on a file opened read-only.
  56                  * Win32 locking semantics allow this.
  57                  * Do the best we can and attempt a read-only lock.
  58                  */
  59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
  60                 return F_RDLCK;
  61         }
  62
  63         /*
  64          * This return should be the most normal, as we attempt
  65          * to always open files read/write.
  66          */
  67
  68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  69 }
  70
  71 /****************************************************************************
  72  Debugging aid :-).
  73 ****************************************************************************/
  74
  75 static const char *posix_lock_type_name(int lock_type)
  76 {
  77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  78 }
  79
  80 /****************************************************************************
  81  Check to see if the given unsigned lock range is within the possible POSIX
  82  range. Modifies the given args to be in range if possible, just returns
  83  False if not.
  84 ****************************************************************************/
  85
  86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
  87                                 uint64_t u_offset, uint64_t u_count)
  88 {
  89         off_t offset = (off_t)u_offset;
  90         off_t count = (off_t)u_count;
  91
  92         /*
  93          * For the type of system we are, attempt to
  94          * find the maximum positive lock offset as an off_t.
  95          */
  96
  97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  98
  99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 100 #else
 101         /*
 102          * In this case off_t is 64 bits,
 103          * and the underlying system can handle 64 bit signed locks.
 104          */
 105
 106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
 107         off_t mask = (mask2<<1);
 108         off_t max_positive_lock_offset = ~mask;
 109
 110 #endif
 111         /*
 112          * POSIX locks of length zero mean lock to end-of-file.
 113          * Win32 locks of length zero are point probes. Ignore
 114          * any Win32 locks of length zero. JRA.
 115          */
 116
 117         if (count == (off_t)0) {
 118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 119                 return False;
 120         }
 121
 122         /*
 123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 124          * ignore this lock.
 125          */
 126
 127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
 128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
 130                 return False;
 131         }
 132
 133         /*
 134          * We must truncate the count to less than max_positive_lock_offset.
 135          */
 136
 137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
 138                 count = max_positive_lock_offset;
 139         }
 140
 141         /*
 142          * Truncate count to end at max lock offset.
 143          */
 144
 145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
 146                 count = max_positive_lock_offset - offset;
 147         }
 148
 149         /*
 150          * If we ate all the count, ignore this lock.
 151          */
 152
 153         if (count == 0) {
 154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 155                                 (double)u_offset, (double)u_count ));
 156                 return False;
 157         }
 158
 159         /*
 160          * The mapping was successful.
 161          */
 162
 163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 164                         (double)offset, (double)count ));
 165
 166         *offset_out = offset;
 167         *count_out = count;
 168
 169         return True;
 170 }
 171
 172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
 173                        struct files_struct *fsp, int op, off_t offset,
 174                        off_t count, int type)
 175 {
 176         VFS_FIND(lock);
 177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
 178 }
 179
 180 /****************************************************************************
 181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 182  broken NFS implementations.
 183 ****************************************************************************/
 184
 185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
 186 {
 187         bool ret;
 188
 189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
 190
 191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 192
 193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 194
 195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 196                                         (double)offset,(double)count));
 197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 199
 200                 /*
 201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 202                  * 32 bit NFS mounted filesystems. Just ignore it.
 203                  */
 204
 205                 if (offset & ~((off_t)0x7fffffff)) {
 206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 207                         return True;
 208                 }
 209
 210                 if (count & ~((off_t)0x7fffffff)) {
 211                         /* 32 bit NFS file system, retry with smaller offset */
 212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 213                         errno = 0;
 214                         count &= 0x7fffffff;
 215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 216                 }
 217         }
 218
 219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 220         return ret;
 221 }
 222
 223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
 224                           struct files_struct *fsp, off_t *poffset,
 225                           off_t *pcount, int *ptype, pid_t *ppid)
 226 {
 227         VFS_FIND(getlock);
 228         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
 229                                        ppid);
 230 }
 231
 232 /****************************************************************************
 233  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
 234  broken NFS implementations.
 235 ****************************************************************************/
 236
 237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
 238 {
 239         pid_t pid;
 240         bool ret;
 241
 242         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
 243                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
 244
 245         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
 246
 247         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 248
 249                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 250                                         (double)*poffset,(double)*pcount));
 251                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 252                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 253
 254                 /*
 255                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 256                  * 32 bit NFS mounted filesystems. Just ignore it.
 257                  */
 258
 259                 if (*poffset & ~((off_t)0x7fffffff)) {
 260                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 261                         return True;
 262                 }
 263
 264                 if (*pcount & ~((off_t)0x7fffffff)) {
 265                         /* 32 bit NFS file system, retry with smaller offset */
 266                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 267                         errno = 0;
 268                         *pcount &= 0x7fffffff;
 269                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
 270                 }
 271         }
 272
 273         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
 274         return ret;
 275 }
 276
 277 /****************************************************************************
 278  POSIX function to see if a file region is locked. Returns True if the
 279  region is locked, False otherwise.
 280 ****************************************************************************/
 281
 282 bool is_posix_locked(files_struct *fsp,
 283                         uint64_t *pu_offset,
 284                         uint64_t *pu_count,
 285                         enum brl_type *plock_type,
 286                         enum brl_flavour lock_flav)
 287 {
 288         off_t offset;
 289         off_t count;
 290         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
 291
 292         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
 293                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
 294                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
 295
 296         /*
 297          * If the requested lock won't fit in the POSIX range, we will
 298          * never set it, so presume it is not locked.
 299          */
 300
 301         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
 302                 return False;
 303         }
 304
 305         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
 306                 return False;
 307         }
 308
 309         if (posix_lock_type == F_UNLCK) {
 310                 return False;
 311         }
 312
 313         if (lock_flav == POSIX_LOCK) {
 314                 /* Only POSIX lock queries need to know the details. */
 315                 *pu_offset = (uint64_t)offset;
 316                 *pu_count = (uint64_t)count;
 317                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
 318         }
 319         return True;
 320 }
 321
 322 /****************************************************************************
 323  Next - the functions that deal with in memory database storing representations
 324  of either Windows CIFS locks or POSIX CIFS locks.
 325 ****************************************************************************/
 326
 327 /* The key used in the in-memory POSIX databases. */
 328
 329 struct lock_ref_count_key {
 330         struct file_id id;
 331         char r;
 332 };
 333
 334 /*******************************************************************
 335  Form a static locking key for a dev/inode pair for the lock ref count
 336 ******************************************************************/
 337
 338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
 339                                           struct lock_ref_count_key *tmp)
 340 {
 341         ZERO_STRUCTP(tmp);
 342         tmp->id = fsp->file_id;
 343         tmp->r = 'r';
 344         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
 345 }
 346
 347 /*******************************************************************
 348  Convenience function to get an fd_array key from an fsp.
 349 ******************************************************************/
 350
 351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
 352 {
 353         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
 354 }
 355
 356 /*******************************************************************
 357  Create the in-memory POSIX lock databases.
 358 ********************************************************************/
 359
 360 bool posix_locking_init(bool read_only)
 361 {
 362         if (posix_pending_close_db != NULL) {
 363                 return true;
 364         }
 365
 366         posix_pending_close_db = db_open_rbt(NULL);
 367
 368         if (posix_pending_close_db == NULL) {
 369                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
 370                 return false;
 371         }
 372
 373         return true;
 374 }
 375
 376 /*******************************************************************
 377  Delete the in-memory POSIX lock databases.
 378 ********************************************************************/
 379
 380 bool posix_locking_end(void)
 381 {
 382         /*
 383          * Shouldn't we close all fd's here?
 384          */
 385         TALLOC_FREE(posix_pending_close_db);
 386         return true;
 387 }
 388
 389 /****************************************************************************
 390  Next - the functions that deal with storing fd's that have outstanding
 391  POSIX locks when closed.
 392 ****************************************************************************/
 393
 394 /****************************************************************************
 395  The records in posix_pending_close_db are composed of an array of
 396  ints keyed by dev/ino pair. Those ints are the fd's that were open on
 397  this dev/ino pair that should have been closed, but can't as the lock
 398  ref count is non zero.
 399 ****************************************************************************/
 400
 401 /****************************************************************************
 402  Keep a reference count of the number of Windows locks open on this dev/ino
 403  pair. Creates entry if it doesn't exist.
 404 ****************************************************************************/
 405
 406 static void increment_windows_lock_ref_count(files_struct *fsp)
 407 {
 408         struct lock_ref_count_key tmp;
 409         int32_t lock_ref_count = 0;
 410         NTSTATUS status;
 411
 412         status = dbwrap_change_int32_atomic(
 413                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 414                 &lock_ref_count, 1);
 415
 416         SMB_ASSERT(NT_STATUS_IS_OK(status));
 417         SMB_ASSERT(lock_ref_count < INT32_MAX);
 418
 419         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
 420                   fsp_str_dbg(fsp), (int)lock_ref_count));
 421 }
 422
 423 /****************************************************************************
 424  Bulk delete - subtract as many locks as we've just deleted.
 425 ****************************************************************************/
 426
 427 static void decrement_windows_lock_ref_count(files_struct *fsp)
 428 {
 429         struct lock_ref_count_key tmp;
 430         int32_t lock_ref_count = 0;
 431         NTSTATUS status;
 432
 433         status = dbwrap_change_int32_atomic(
 434                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 435                 &lock_ref_count, -1);
 436
 437         SMB_ASSERT(NT_STATUS_IS_OK(status));
 438         SMB_ASSERT(lock_ref_count >= 0);
 439
 440         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
 441                   fsp_str_dbg(fsp), (int)lock_ref_count));
 442 }
 443
 444 /****************************************************************************
 445  Fetch the lock ref count.
 446 ****************************************************************************/
 447
 448 static int32_t get_windows_lock_ref_count(files_struct *fsp)
 449 {
 450         struct lock_ref_count_key tmp;
 451         NTSTATUS status;
 452         int32_t lock_ref_count = 0;
 453
 454         status = dbwrap_fetch_int32(
 455                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 456                 &lock_ref_count);
 457
 458         if (!NT_STATUS_IS_OK(status) &&
 459             !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 460                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
 461                           "lock ref count for file %s: %s\n",
 462                           fsp_str_dbg(fsp), nt_errstr(status)));
 463         }
 464         return lock_ref_count;
 465 }
 466
 467 /****************************************************************************
 468  Delete a lock_ref_count entry.
 469 ****************************************************************************/
 470
 471 static void delete_windows_lock_ref_count(files_struct *fsp)
 472 {
 473         struct lock_ref_count_key tmp;
 474
 475         /* Not a bug if it doesn't exist - no locks were ever granted. */
 476
 477         dbwrap_delete(posix_pending_close_db,
 478                       locking_ref_count_key_fsp(fsp, &tmp));
 479
 480         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
 481                   fsp_str_dbg(fsp)));
 482 }
 483
 484 /****************************************************************************
 485  Add an fd to the pending close tdb.
 486 ****************************************************************************/
 487
 488 static void add_fd_to_close_entry(files_struct *fsp)
 489 {
 490         struct db_record *rec;
 491         int *fds;
 492         size_t num_fds;
 493         NTSTATUS status;
 494         TDB_DATA value;
 495
 496         rec = dbwrap_fetch_locked(
 497                 posix_pending_close_db, talloc_tos(),
 498                 fd_array_key_fsp(fsp));
 499
 500         SMB_ASSERT(rec != NULL);
 501
 502         value = dbwrap_record_get_value(rec);
 503         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
 504
 505         num_fds = value.dsize / sizeof(int);
 506         fds = talloc_array(rec, int, num_fds+1);
 507
 508         SMB_ASSERT(fds != NULL);
 509
 510         memcpy(fds, value.dptr, value.dsize);
 511         fds[num_fds] = fsp->fh->fd;
 512
 513         status = dbwrap_record_store(
 514                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
 515
 516         SMB_ASSERT(NT_STATUS_IS_OK(status));
 517
 518         TALLOC_FREE(rec);
 519
 520         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
 521                   fsp->fh->fd, fsp_str_dbg(fsp)));
 522 }
 523
 524 /****************************************************************************
 525  Remove all fd entries for a specific dev/inode pair from the tdb.
 526 ****************************************************************************/
 527
 528 static void delete_close_entries(files_struct *fsp)
 529 {
 530         struct db_record *rec;
 531
 532         rec = dbwrap_fetch_locked(
 533                 posix_pending_close_db, talloc_tos(),
 534                 fd_array_key_fsp(fsp));
 535
 536         SMB_ASSERT(rec != NULL);
 537         dbwrap_record_delete(rec);
 538         TALLOC_FREE(rec);
 539 }
 540
 541 /****************************************************************************
 542  Get the array of POSIX pending close records for an open fsp. Returns number
 543  of entries.
 544 ****************************************************************************/
 545
 546 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
 547                                               files_struct *fsp, int **entries)
 548 {
 549         TDB_DATA dbuf;
 550         NTSTATUS status;
 551
 552         status = dbwrap_fetch(
 553                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
 554                 &dbuf);
 555
 556         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 557                 *entries = NULL;
 558                 return 0;
 559         }
 560
 561         SMB_ASSERT(NT_STATUS_IS_OK(status));
 562
 563         if (dbuf.dsize == 0) {
 564                 *entries = NULL;
 565                 return 0;
 566         }
 567
 568         *entries = (int *)dbuf.dptr;
 569         return (size_t)(dbuf.dsize / sizeof(int));
 570 }
 571
 572 /****************************************************************************
 573  Deal with pending closes needed by POSIX locking support.
 574  Note that posix_locking_close_file() is expected to have been called
 575  to delete all locks on this fsp before this function is called.
 576 ****************************************************************************/
 577
 578 int fd_close_posix(struct files_struct *fsp)
 579 {
 580         int saved_errno = 0;
 581         int ret;
 582         int *fd_array = NULL;
 583         size_t count, i;
 584
 585         if (!lp_locking(fsp->conn->params) ||
 586             !lp_posix_locking(fsp->conn->params))
 587         {
 588                 /*
 589                  * No locking or POSIX to worry about or we want POSIX semantics
 590                  * which will lose all locks on all fd's open on this dev/inode,
 591                  * just close.
 592                  */
 593                 return close(fsp->fh->fd);
 594         }
 595
 596         if (get_windows_lock_ref_count(fsp)) {
 597
 598                 /*
 599                  * There are outstanding locks on this dev/inode pair on
 600                  * other fds. Add our fd to the pending close tdb and set
 601                  * fsp->fh->fd to -1.
 602                  */
 603
 604                 add_fd_to_close_entry(fsp);
 605                 return 0;
 606         }
 607
 608         /*
 609          * No outstanding locks. Get the pending close fd's
 610          * from the tdb and close them all.
 611          */
 612
 613         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
 614
 615         if (count) {
 616                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
 617                           (unsigned int)count));
 618
 619                 for(i = 0; i < count; i++) {
 620                         if (close(fd_array[i]) == -1) {
 621                                 saved_errno = errno;
 622                         }
 623                 }
 624
 625                 /*
 626                  * Delete all fd's stored in the tdb
 627                  * for this dev/inode pair.
 628                  */
 629
 630                 delete_close_entries(fsp);
 631         }
 632
 633         TALLOC_FREE(fd_array);
 634
 635         /* Don't need a lock ref count on this dev/ino anymore. */
 636         delete_windows_lock_ref_count(fsp);
 637
 638         /*
 639          * Finally close the fd associated with this fsp.
 640          */
 641
 642         ret = close(fsp->fh->fd);
 643
 644         if (ret == 0 && saved_errno != 0) {
 645                 errno = saved_errno;
 646                 ret = -1;
 647         }
 648
 649         return ret;
 650 }
 651
 652 /****************************************************************************
 653  Next - the functions that deal with the mapping CIFS Windows locks onto
 654  the underlying system POSIX locks.
 655 ****************************************************************************/
 656
 657 /*
 658  * Structure used when splitting a lock range
 659  * into a POSIX lock range. Doubly linked list.
 660  */
 661
 662 struct lock_list {
 663         struct lock_list *next;
 664         struct lock_list *prev;
 665         off_t start;
 666         off_t size;
 667 };
 668
 669 /****************************************************************************
 670  Create a list of lock ranges that don't overlap a given range. Used in calculating
 671  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 672  understand it :-).
 673 ****************************************************************************/
 674
 675 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
 676                                                 struct lock_list *lhead,
 677                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
 678                                                 files_struct *fsp,
 679                                                 const struct lock_struct *plocks,
 680                                                 int num_locks)
 681 {
 682         int i;
 683
 684         /*
 685          * Check the current lock list on this dev/inode pair.
 686          * Quit if the list is deleted.
 687          */
 688
 689         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 690                 (double)lhead->start, (double)lhead->size ));
 691
 692         for (i=0; i<num_locks && lhead; i++) {
 693                 const struct lock_struct *lock = &plocks[i];
 694                 struct lock_list *l_curr;
 695
 696                 /* Ignore all but read/write locks. */
 697                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
 698                         continue;
 699                 }
 700
 701                 /* Ignore locks not owned by this process. */
 702                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
 703                         continue;
 704                 }
 705
 706                 /*
 707                  * Walk the lock list, checking for overlaps. Note that
 708                  * the lock list can expand within this loop if the current
 709                  * range being examined needs to be split.
 710                  */
 711
 712                 for (l_curr = lhead; l_curr;) {
 713
 714                         DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
 715                                 (unsigned long long)lock->fnum,
 716                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 717
 718                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 719                                  (lock->start >= (l_curr->start + l_curr->size))) {
 720
 721                                 /* No overlap with existing lock - leave this range alone. */
 722 /*********************************************
 723                                              +---------+
 724                                              | l_curr  |
 725                                              +---------+
 726                                 +-------+
 727                                 | lock  |
 728                                 +-------+
 729 OR....
 730              +---------+
 731              |  l_curr |
 732              +---------+
 733 **********************************************/
 734
 735                                 DEBUG(10,(" no overlap case.\n" ));
 736
 737                                 l_curr = l_curr->next;
 738
 739                         } else if ( (l_curr->start >= lock->start) &&
 740                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 741
 742                                 /*
 743                                  * This range is completely overlapped by this existing lock range
 744                                  * and thus should have no effect. Delete it from the list.
 745                                  */
 746 /*********************************************
 747                 +---------+
 748                 |  l_curr |
 749                 +---------+
 750         +---------------------------+
 751         |       lock                |
 752         +---------------------------+
 753 **********************************************/
 754                                 /* Save the next pointer */
 755                                 struct lock_list *ul_next = l_curr->next;
 756
 757                                 DEBUG(10,(" delete case.\n" ));
 758
 759                                 DLIST_REMOVE(lhead, l_curr);
 760                                 if(lhead == NULL) {
 761                                         break; /* No more list... */
 762                                 }
 763
 764                                 l_curr = ul_next;
 765
 766                         } else if ( (l_curr->start >= lock->start) &&
 767                                                 (l_curr->start < lock->start + lock->size) &&
 768                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 769
 770                                 /*
 771                                  * This range overlaps the existing lock range at the high end.
 772                                  * Truncate by moving start to existing range end and reducing size.
 773                                  */
 774 /*********************************************
 775                 +---------------+
 776                 |  l_curr       |
 777                 +---------------+
 778         +---------------+
 779         |    lock       |
 780         +---------------+
 781 BECOMES....
 782                         +-------+
 783                         | l_curr|
 784                         +-------+
 785 **********************************************/
 786
 787                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 788                                 l_curr->start = lock->start + lock->size;
 789
 790                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
 791                                                                 (double)l_curr->start, (double)l_curr->size ));
 792
 793                                 l_curr = l_curr->next;
 794
 795                         } else if ( (l_curr->start < lock->start) &&
 796                                                 (l_curr->start + l_curr->size > lock->start) &&
 797                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 798
 799                                 /*
 800                                  * This range overlaps the existing lock range at the low end.
 801                                  * Truncate by reducing size.
 802                                  */
 803 /*********************************************
 804    +---------------+
 805    |  l_curr       |
 806    +---------------+
 807            +---------------+
 808            |    lock       |
 809            +---------------+
 810 BECOMES....
 811    +-------+
 812    | l_curr|
 813    +-------+
 814 **********************************************/
 815
 816                                 l_curr->size = lock->start - l_curr->start;
 817
 818                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
 819                                                                 (double)l_curr->start, (double)l_curr->size ));
 820
 821                                 l_curr = l_curr->next;
 822
 823                         } else if ( (l_curr->start < lock->start) &&
 824                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 825                                 /*
 826                                  * Worst case scenario. Range completely overlaps an existing
 827                                  * lock range. Split the request into two, push the new (upper) request
 828                                  * into the dlink list, and continue with the entry after l_new (as we
 829                                  * know that l_new will not overlap with this lock).
 830                                  */
 831 /*********************************************
 832         +---------------------------+
 833         |        l_curr             |
 834         +---------------------------+
 835                 +---------+
 836                 | lock    |
 837                 +---------+
 838 BECOMES.....
 839         +-------+         +---------+
 840         | l_curr|         | l_new   |
 841         +-------+         +---------+
 842 **********************************************/
 843                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
 844
 845                                 if(l_new == NULL) {
 846                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 847                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 848                                 }
 849
 850                                 ZERO_STRUCTP(l_new);
 851                                 l_new->start = lock->start + lock->size;
 852                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 853
 854                                 /* Truncate the l_curr. */
 855                                 l_curr->size = lock->start - l_curr->start;
 856
 857                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
 858 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 859                                                                 (double)l_new->start, (double)l_new->size ));
 860
 861                                 /*
 862                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 863                                  */
 864                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
 865
 866                                 /* And move after the link we added. */
 867                                 l_curr = l_new->next;
 868
 869                         } else {
 870
 871                                 /*
 872                                  * This logic case should never happen. Ensure this is the
 873                                  * case by forcing an abort.... Remove in production.
 874                                  */
 875                                 char *msg = NULL;
 876
 877                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 878 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
 879                                         smb_panic(msg);
 880                                 } else {
 881                                         smb_panic("posix_lock_list");
 882                                 }
 883                         }
 884                 } /* end for ( l_curr = lhead; l_curr;) */
 885         } /* end for (i=0; i<num_locks && ul_head; i++) */
 886
 887         return lhead;
 888 }
 889
 890 /****************************************************************************
 891  POSIX function to acquire a lock. Returns True if the
 892  lock could be granted, False if not.
 893 ****************************************************************************/
 894
 895 bool set_posix_lock_windows_flavour(files_struct *fsp,
 896                         uint64_t u_offset,
 897                         uint64_t u_count,
 898                         enum brl_type lock_type,
 899                         const struct lock_context *lock_ctx,
 900                         const struct lock_struct *plocks,
 901                         int num_locks,
 902                         int *errno_ret)
 903 {
 904         off_t offset;
 905         off_t count;
 906         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 907         bool ret = True;
 908         size_t lock_count;
 909         TALLOC_CTX *l_ctx = NULL;
 910         struct lock_list *llist = NULL;
 911         struct lock_list *ll = NULL;
 912
 913         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
 914                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
 915                  (double)u_offset, (double)u_count,
 916                  posix_lock_type_name(lock_type)));
 917
 918         /*
 919          * If the requested lock won't fit in the POSIX range, we will
 920          * pretend it was successful.
 921          */
 922
 923         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
 924                 increment_windows_lock_ref_count(fsp);
 925                 return True;
 926         }
 927
 928         /*
 929          * Windows is very strange. It allows read locks to be overlayed
 930          * (even over a write lock), but leaves the write lock in force until the first
 931          * unlock. It also reference counts the locks. This means the following sequence :
 932          *
 933          * process1                                      process2
 934          * ------------------------------------------------------------------------
 935          * WRITE LOCK : start = 2, len = 10
 936          *                                            READ LOCK: start =0, len = 10 - FAIL
 937          * READ LOCK : start = 0, len = 14
 938          *                                            READ LOCK: start =0, len = 10 - FAIL
 939          * UNLOCK : start = 2, len = 10
 940          *                                            READ LOCK: start =0, len = 10 - OK
 941          *
 942          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 943          * would leave a single read lock over the 0-14 region.
 944          */
 945
 946         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
 947                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
 948                 return False;
 949         }
 950
 951         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
 952                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
 953                 talloc_destroy(l_ctx);
 954                 return False;
 955         }
 956
 957         /*
 958          * Create the initial list entry containing the
 959          * lock we want to add.
 960          */
 961
 962         ZERO_STRUCTP(ll);
 963         ll->start = offset;
 964         ll->size = count;
 965
 966         DLIST_ADD(llist, ll);
 967
 968         /*
 969          * The following call calculates if there are any
 970          * overlapping locks held by this process on
 971          * fd's open on the same file and splits this list
 972          * into a list of lock ranges that do not overlap with existing
 973          * POSIX locks.
 974          */
 975
 976         llist = posix_lock_list(l_ctx,
 977                                 llist,
 978                                 lock_ctx, /* Lock context llist belongs to. */
 979                                 fsp,
 980                                 plocks,
 981                                 num_locks);
 982
 983         /*
 984          * Add the POSIX locks on the list of ranges returned.
 985          * As the lock is supposed to be added atomically, we need to
 986          * back out all the locks if any one of these calls fail.
 987          */
 988
 989         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
 990                 offset = ll->start;
 991                 count = ll->size;
 992
 993                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
 994                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
 995
 996                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
 997                         *errno_ret = errno;
 998                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
 999                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1000                         ret = False;
1001                         break;
1002                 }
1003         }
1004
1005         if (!ret) {
1006
1007                 /*
1008                  * Back out all the POSIX locks we have on fail.
1009                  */
1010
1011                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1012                         offset = ll->start;
1013                         count = ll->size;
1014
1015                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1016                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1017
1018                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1019                 }
1020         } else {
1021                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1022                 increment_windows_lock_ref_count(fsp);
1023         }
1024
1025         talloc_destroy(l_ctx);
1026         return ret;
1027 }
1028
1029 /****************************************************************************
1030  POSIX function to release a lock. Returns True if the
1031  lock could be released, False if not.
1032 ****************************************************************************/
1033
1034 bool release_posix_lock_windows_flavour(files_struct *fsp,
1035                                 uint64_t u_offset,
1036                                 uint64_t u_count,
1037                                 enum brl_type deleted_lock_type,
1038                                 const struct lock_context *lock_ctx,
1039                                 const struct lock_struct *plocks,
1040                                 int num_locks)
1041 {
1042         off_t offset;
1043         off_t count;
1044         bool ret = True;
1045         TALLOC_CTX *ul_ctx = NULL;
1046         struct lock_list *ulist = NULL;
1047         struct lock_list *ul = NULL;
1048
1049         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1050                  "count = %.0f\n", fsp_str_dbg(fsp),
1051                  (double)u_offset, (double)u_count));
1052
1053         /* Remember the number of Windows locks we have on this dev/ino pair. */
1054         decrement_windows_lock_ref_count(fsp);
1055
1056         /*
1057          * If the requested lock won't fit in the POSIX range, we will
1058          * pretend it was successful.
1059          */
1060
1061         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1062                 return True;
1063         }
1064
1065         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1066                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1067                 return False;
1068         }
1069
1070         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1071                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1072                 talloc_destroy(ul_ctx);
1073                 return False;
1074         }
1075
1076         /*
1077          * Create the initial list entry containing the
1078          * lock we want to remove.
1079          */
1080
1081         ZERO_STRUCTP(ul);
1082         ul->start = offset;
1083         ul->size = count;
1084
1085         DLIST_ADD(ulist, ul);
1086
1087         /*
1088          * The following call calculates if there are any
1089          * overlapping locks held by this process on
1090          * fd's open on the same file and creates a
1091          * list of unlock ranges that will allow
1092          * POSIX lock ranges to remain on the file whilst the
1093          * unlocks are performed.
1094          */
1095
1096         ulist = posix_lock_list(ul_ctx,
1097                                 ulist,
1098                                 lock_ctx, /* Lock context ulist belongs to. */
1099                                 fsp,
1100                                 plocks,
1101                                 num_locks);
1102
1103         /*
1104          * If there were any overlapped entries (list is > 1 or size or start have changed),
1105          * and the lock_type we just deleted from
1106          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1107          * the POSIX lock to a read lock. This allows any overlapping read locks
1108          * to be atomically maintained.
1109          */
1110
1111         if (deleted_lock_type == WRITE_LOCK &&
1112                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1113
1114                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1115                         (double)offset, (double)count ));
1116
1117                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1118                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1119                         talloc_destroy(ul_ctx);
1120                         return False;
1121                 }
1122         }
1123
1124         /*
1125          * Release the POSIX locks on the list of ranges returned.
1126          */
1127
1128         for(; ulist; ulist = ulist->next) {
1129                 offset = ulist->start;
1130                 count = ulist->size;
1131
1132                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1133                         (double)offset, (double)count ));
1134
1135                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1136                         ret = False;
1137                 }
1138         }
1139
1140         talloc_destroy(ul_ctx);
1141         return ret;
1142 }
1143
1144 /****************************************************************************
1145  Next - the functions that deal with mapping CIFS POSIX locks onto
1146  the underlying system POSIX locks.
1147 ****************************************************************************/
1148
1149 /****************************************************************************
1150  POSIX function to acquire a lock. Returns True if the
1151  lock could be granted, False if not.
1152  As POSIX locks don't stack or conflict (they just overwrite)
1153  we can map the requested lock directly onto a system one. We
1154  know it doesn't conflict with locks on other contexts as the
1155  upper layer would have refused it.
1156 ****************************************************************************/
1157
1158 bool set_posix_lock_posix_flavour(files_struct *fsp,
1159                         uint64_t u_offset,
1160                         uint64_t u_count,
1161                         enum brl_type lock_type,
1162                         int *errno_ret)
1163 {
1164         off_t offset;
1165         off_t count;
1166         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1167
1168         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1169                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1170                  (double)u_offset, (double)u_count,
1171                  posix_lock_type_name(lock_type)));
1172
1173         /*
1174          * If the requested lock won't fit in the POSIX range, we will
1175          * pretend it was successful.
1176          */
1177
1178         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1179                 return True;
1180         }
1181
1182         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1183                 *errno_ret = errno;
1184                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1185                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1186                 return False;
1187         }
1188         return True;
1189 }
1190
1191 /****************************************************************************
1192  POSIX function to release a lock. Returns True if the
1193  lock could be released, False if not.
1194  We are given a complete lock state from the upper layer which is what the lock
1195  state should be after the unlock has already been done, so what
1196  we do is punch out holes in the unlock range where locks owned by this process
1197  have a different lock context.
1198 ****************************************************************************/
1199
1200 bool release_posix_lock_posix_flavour(files_struct *fsp,
1201                                 uint64_t u_offset,
1202                                 uint64_t u_count,
1203                                 const struct lock_context *lock_ctx,
1204                                 const struct lock_struct *plocks,
1205                                 int num_locks)
1206 {
1207         bool ret = True;
1208         off_t offset;
1209         off_t count;
1210         TALLOC_CTX *ul_ctx = NULL;
1211         struct lock_list *ulist = NULL;
1212         struct lock_list *ul = NULL;
1213
1214         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1215                  "count = %.0f\n", fsp_str_dbg(fsp),
1216                  (double)u_offset, (double)u_count));
1217
1218         /*
1219          * If the requested lock won't fit in the POSIX range, we will
1220          * pretend it was successful.
1221          */
1222
1223         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1224                 return True;
1225         }
1226
1227         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1228                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1229                 return False;
1230         }
1231
1232         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1233                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1234                 talloc_destroy(ul_ctx);
1235                 return False;
1236         }
1237
1238         /*
1239          * Create the initial list entry containing the
1240          * lock we want to remove.
1241          */
1242
1243         ZERO_STRUCTP(ul);
1244         ul->start = offset;
1245         ul->size = count;
1246
1247         DLIST_ADD(ulist, ul);
1248
1249         /*
1250          * Walk the given array creating a linked list
1251          * of unlock requests.
1252          */
1253
1254         ulist = posix_lock_list(ul_ctx,
1255                                 ulist,
1256                                 lock_ctx, /* Lock context ulist belongs to. */
1257                                 fsp,
1258                                 plocks,
1259                                 num_locks);
1260
1261         /*
1262          * Release the POSIX locks on the list of ranges returned.
1263          */
1264
1265         for(; ulist; ulist = ulist->next) {
1266                 offset = ulist->start;
1267                 count = ulist->size;
1268
1269                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1270                         (double)offset, (double)count ));
1271
1272                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1273                         ret = False;
1274                 }
1275         }
1276
1277         talloc_destroy(ul_ctx);
1278         return ret;
1279 }