source/ntvfs/common/brlock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3
   4    generic byte range locking code
   5
   6    Copyright (C) Andrew Tridgell 1992-2004
   7    Copyright (C) Jeremy Allison 1992-2000
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 /* This module implements a tdb based byte range locking service,
  25    replacing the fcntl() based byte range locking previously
  26    used. This allows us to provide the same semantics as NT */
  27
  28 #include "includes.h"
  29 #include "system/filesys.h"
  30 #include "lib/tdb/include/tdb.h"
  31 #include "messaging/messaging.h"
  32 #include "db_wrap.h"
  33 #include "lib/messaging/irpc.h"
  34
  35 /*
  36   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
  37   a file. For a local posix filesystem this will usually be a combination
  38   of the device and inode numbers of the file, but it can be anything
  39   that uniquely idetifies a file for locking purposes, as long
  40   as it is applied consistently.
  41 */
  42
  43 /*
  44   the lock context contains the elements that define whether one
  45   lock is the same as another lock
  46 */
  47 struct lock_context {
  48         uint32_t server;
  49         uint16_t smbpid;
  50         uint16_t tid;
  51 };
  52
  53 /* The data in brlock records is an unsorted linear array of these
  54    records.  It is unnecessary to store the count as tdb provides the
  55    size of the record */
  56 struct lock_struct {
  57         struct lock_context context;
  58         uint64_t start;
  59         uint64_t size;
  60         uint16_t fnum;
  61         enum brl_type lock_type;
  62         void *notify_ptr;
  63 };
  64
  65 struct brl_context {
  66         struct tdb_wrap *w;
  67         uint32_t server;
  68         uint16_t tid;
  69         struct messaging_context *messaging_ctx;
  70         struct lock_struct last_lock;
  71 };
  72
  73
  74 /*
  75   Open up the brlock.tdb database. Close it down using
  76   talloc_free(). We need the messaging_ctx to allow for
  77   pending lock notifications.
  78 */
  79 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server, uint16_t tid,
  80                              struct messaging_context *messaging_ctx)
  81 {
  82         char *path;
  83         struct brl_context *brl;
  84
  85         brl = talloc(mem_ctx, struct brl_context);
  86         if (brl == NULL) {
  87                 return NULL;
  88         }
  89
  90         path = smbd_tmp_path(brl, "brlock.tdb");
  91         brl->w = tdb_wrap_open(brl, path, 0,
  92                                TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
  93         talloc_free(path);
  94         if (brl->w == NULL) {
  95                 talloc_free(brl);
  96                 return NULL;
  97         }
  98
  99         brl->server = server;
 100         brl->tid = tid;
 101         brl->messaging_ctx = messaging_ctx;
 102         ZERO_STRUCT(brl->last_lock);
 103
 104         return brl;
 105 }
 106
 107
 108 /*
 109   see if two locking contexts are equal
 110 */
 111 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
 112 {
 113         return (ctx1->server == ctx2->server &&
 114                 ctx1->smbpid == ctx2->smbpid &&
 115                 ctx1->tid == ctx2->tid);
 116 }
 117
 118 /*
 119   see if lck1 and lck2 overlap
 120 */
 121 static BOOL brl_overlap(struct lock_struct *lck1,
 122                         struct lock_struct *lck2)
 123 {
 124         /* this extra check is not redundent - it copes with locks
 125            that go beyond the end of 64 bit file space */
 126         if (lck1->size != 0 &&
 127             lck1->start == lck2->start &&
 128             lck1->size == lck2->size) {
 129                 return True;
 130         }
 131
 132         if (lck1->start >= (lck2->start+lck2->size) ||
 133             lck2->start >= (lck1->start+lck1->size)) {
 134                 return False;
 135         }
 136         return True;
 137 }
 138
 139 /*
 140  See if lock2 can be added when lock1 is in place.
 141 */
 142 static BOOL brl_conflict(struct lock_struct *lck1,
 143                          struct lock_struct *lck2)
 144 {
 145         /* pending locks don't conflict with anything */
 146         if (lck1->lock_type >= PENDING_READ_LOCK ||
 147             lck2->lock_type >= PENDING_READ_LOCK) {
 148                 return False;
 149         }
 150
 151         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 152                 return False;
 153         }
 154
 155         if (brl_same_context(&lck1->context, &lck2->context) &&
 156             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
 157                 return False;
 158         }
 159
 160         return brl_overlap(lck1, lck2);
 161 }
 162
 163
 164 /*
 165  Check to see if this lock conflicts, but ignore our own locks on the
 166  same fnum only.
 167 */
 168 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 169 {
 170         /* pending locks don't conflict with anything */
 171         if (lck1->lock_type >= PENDING_READ_LOCK ||
 172             lck2->lock_type >= PENDING_READ_LOCK) {
 173                 return False;
 174         }
 175
 176         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
 177                 return False;
 178
 179         /*
 180          * note that incoming write calls conflict with existing READ
 181          * locks even if the context is the same. JRA. See LOCKTEST7
 182          * in smbtorture.
 183          */
 184         if (brl_same_context(&lck1->context, &lck2->context) &&
 185             lck1->fnum == lck2->fnum &&
 186             (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 187                 return False;
 188         }
 189
 190         return brl_overlap(lck1, lck2);
 191 }
 192
 193
 194 /*
 195   amazingly enough, w2k3 "remembers" whether the last lock failure
 196   is the same as this one and changes its error code. I wonder if any
 197   app depends on this?
 198 */
 199 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
 200 {
 201         if (lock->context.server == brl->last_lock.context.server &&
 202             lock->context.tid == brl->last_lock.context.tid &&
 203             lock->fnum == brl->last_lock.fnum &&
 204             lock->start == brl->last_lock.start &&
 205             lock->size == brl->last_lock.size) {
 206                 return NT_STATUS_FILE_LOCK_CONFLICT;
 207         }
 208         brl->last_lock = *lock;
 209         if (lock->start >= 0xEF000000 &&
 210             (lock->start >> 63) == 0) {
 211                 /* amazing the little things you learn with a test
 212                    suite. Locks beyond this offset (as a 64 bit
 213                    number!) always generate the conflict error code,
 214                    unless the top bit is set */
 215                 return NT_STATUS_FILE_LOCK_CONFLICT;
 216         }
 217         return NT_STATUS_LOCK_NOT_GRANTED;
 218 }
 219
 220 /*
 221   Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
 222   which case a real lock is first tried, and if that fails then a
 223   pending lock is created. When the pending lock is triggered (by
 224   someone else closing an overlapping lock range) a messaging
 225   notification is sent, identified by the notify_ptr
 226 */
 227 NTSTATUS brl_lock(struct brl_context *brl,
 228                   DATA_BLOB *file_key,
 229                   uint16_t smbpid,
 230                   uint16_t fnum,
 231                   uint64_t start, uint64_t size,
 232                   enum brl_type lock_type,
 233                   void *notify_ptr)
 234 {
 235         TDB_DATA kbuf, dbuf;
 236         int count=0, i;
 237         struct lock_struct lock, *locks=NULL;
 238         NTSTATUS status;
 239
 240         kbuf.dptr = (char *)file_key->data;
 241         kbuf.dsize = file_key->length;
 242
 243         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 244                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 245         }
 246
 247         /* if this is a pending lock, then with the chainlock held we
 248            try to get the real lock. If we succeed then we don't need
 249            to make it pending. This prevents a possible race condition
 250            where the pending lock gets created after the lock that is
 251            preventing the real lock gets removed */
 252         if (lock_type >= PENDING_READ_LOCK) {
 253                 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
 254                 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
 255                 if (NT_STATUS_IS_OK(status)) {
 256                         tdb_chainunlock(brl->w->tdb, kbuf);
 257                         return NT_STATUS_OK;
 258                 }
 259         }
 260
 261         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 262
 263         lock.context.smbpid = smbpid;
 264         lock.context.server = brl->server;
 265         lock.context.tid = brl->tid;
 266         lock.start = start;
 267         lock.size = size;
 268         lock.fnum = fnum;
 269         lock.lock_type = lock_type;
 270         lock.notify_ptr = notify_ptr;
 271
 272         if (dbuf.dptr) {
 273                 /* there are existing locks - make sure they don't conflict */
 274                 locks = (struct lock_struct *)dbuf.dptr;
 275                 count = dbuf.dsize / sizeof(*locks);
 276                 for (i=0; i<count; i++) {
 277                         if (brl_conflict(&locks[i], &lock)) {
 278                                 status = brl_lock_failed(brl, &lock);
 279                                 goto fail;
 280                         }
 281                 }
 282         }
 283
 284         /* no conflicts - add it to the list of locks */
 285         locks = realloc_p(locks, struct lock_struct, count+1);
 286         if (!locks) {
 287                 status = NT_STATUS_NO_MEMORY;
 288                 goto fail;
 289         } else {
 290                 dbuf.dptr = (char *)locks;
 291         }
 292         locks[count] = lock;
 293         dbuf.dsize += sizeof(lock);
 294
 295         if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 296                 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 297                 goto fail;
 298         }
 299
 300         free(dbuf.dptr);
 301         tdb_chainunlock(brl->w->tdb, kbuf);
 302
 303         /* the caller needs to know if the real lock was granted. If
 304            we have reached here then it must be a pending lock that
 305            was granted, so tell them the lock failed */
 306         if (lock_type >= PENDING_READ_LOCK) {
 307                 return brl_lock_failed(brl, &lock);
 308         }
 309
 310         return NT_STATUS_OK;
 311
 312  fail:
 313
 314         free(dbuf.dptr);
 315         tdb_chainunlock(brl->w->tdb, kbuf);
 316         return status;
 317 }
 318
 319
 320 /*
 321   we are removing a lock that might be holding up a pending lock. Scan for pending
 322   locks that cover this range and if we find any then notify the server that it should
 323   retry the lock
 324 */
 325 static void brl_notify_unlock(struct brl_context *brl,
 326                               struct lock_struct *locks, int count,
 327                               struct lock_struct *removed_lock)
 328 {
 329         int i, last_notice;
 330
 331         /* the last_notice logic is to prevent stampeding on a lock
 332            range. It prevents us sending hundreds of notifies on the
 333            same range of bytes. It doesn't prevent all possible
 334            stampedes, but it does prevent the most common problem */
 335         last_notice = -1;
 336
 337         for (i=0;i<count;i++) {
 338                 if (locks[i].lock_type >= PENDING_READ_LOCK &&
 339                     brl_overlap(&locks[i], removed_lock)) {
 340                         if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
 341                                 continue;
 342                         }
 343                         if (locks[i].lock_type == PENDING_WRITE_LOCK) {
 344                                 last_notice = i;
 345                         }
 346                         messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
 347                                            MSG_BRL_RETRY, locks[i].notify_ptr);
 348                 }
 349         }
 350 }
 351
 352
 353 /*
 354   send notifications for all pending locks - the file is being closed by this
 355   user
 356 */
 357 static void brl_notify_all(struct brl_context *brl,
 358                            struct lock_struct *locks, int count)
 359 {
 360         int i;
 361         for (i=0;i<count;i++) {
 362                 if (locks->lock_type >= PENDING_READ_LOCK) {
 363                         brl_notify_unlock(brl, locks, count, &locks[i]);
 364                 }
 365         }
 366 }
 367
 368
 369
 370 /*
 371  Unlock a range of bytes.
 372 */
 373 NTSTATUS brl_unlock(struct brl_context *brl,
 374                     DATA_BLOB *file_key,
 375                     uint16_t smbpid,
 376                     uint16_t fnum,
 377                     uint64_t start, uint64_t size)
 378 {
 379         TDB_DATA kbuf, dbuf;
 380         int count, i;
 381         struct lock_struct *locks;
 382         struct lock_context context;
 383         NTSTATUS status;
 384
 385         kbuf.dptr = (char *)file_key->data;
 386         kbuf.dsize = file_key->length;
 387
 388         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 389                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 390         }
 391
 392         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 393         if (!dbuf.dptr) {
 394                 tdb_chainunlock(brl->w->tdb, kbuf);
 395                 return NT_STATUS_RANGE_NOT_LOCKED;
 396         }
 397
 398         context.smbpid = smbpid;
 399         context.server = brl->server;
 400         context.tid = brl->tid;
 401
 402         /* there are existing locks - find a match */
 403         locks = (struct lock_struct *)dbuf.dptr;
 404         count = dbuf.dsize / sizeof(*locks);
 405
 406         for (i=0; i<count; i++) {
 407                 struct lock_struct *lock = &locks[i];
 408
 409                 if (brl_same_context(&lock->context, &context) &&
 410                     lock->fnum == fnum &&
 411                     lock->start == start &&
 412                     lock->size == size &&
 413                     lock->notify_ptr == NULL) {
 414                         /* found it - delete it */
 415                         if (count == 1) {
 416                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 417                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 418                                         goto fail;
 419                                 }
 420                         } else {
 421                                 struct lock_struct removed_lock = *lock;
 422                                 if (i < count-1) {
 423                                         memmove(&locks[i], &locks[i+1],
 424                                                 sizeof(*locks)*((count-1) - i));
 425                                 }
 426                                 count--;
 427
 428                                 /* send notifications for any relevant pending locks */
 429                                 brl_notify_unlock(brl, locks, count, &removed_lock);
 430
 431                                 dbuf.dsize = count * sizeof(*locks);
 432
 433                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 434                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 435                                         goto fail;
 436                                 }
 437                         }
 438
 439                         free(dbuf.dptr);
 440                         tdb_chainunlock(brl->w->tdb, kbuf);
 441                         return NT_STATUS_OK;
 442                 }
 443         }
 444
 445         /* we didn't find it */
 446         status = NT_STATUS_RANGE_NOT_LOCKED;
 447
 448  fail:
 449         free(dbuf.dptr);
 450         tdb_chainunlock(brl->w->tdb, kbuf);
 451         return status;
 452 }
 453
 454
 455 /*
 456   remove a pending lock. This is called when the caller has either
 457   given up trying to establish a lock or when they have succeeded in
 458   getting it. In either case they no longer need to be notified.
 459 */
 460 NTSTATUS brl_remove_pending(struct brl_context *brl,
 461                             DATA_BLOB *file_key,
 462                             void *notify_ptr)
 463 {
 464         TDB_DATA kbuf, dbuf;
 465         int count, i;
 466         struct lock_struct *locks;
 467         NTSTATUS status;
 468
 469         kbuf.dptr = (char *)file_key->data;
 470         kbuf.dsize = file_key->length;
 471
 472         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 473                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 474         }
 475
 476         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 477         if (!dbuf.dptr) {
 478                 tdb_chainunlock(brl->w->tdb, kbuf);
 479                 return NT_STATUS_RANGE_NOT_LOCKED;
 480         }
 481
 482         /* there are existing locks - find a match */
 483         locks = (struct lock_struct *)dbuf.dptr;
 484         count = dbuf.dsize / sizeof(*locks);
 485
 486         for (i=0; i<count; i++) {
 487                 struct lock_struct *lock = &locks[i];
 488
 489                 if (lock->notify_ptr == notify_ptr &&
 490                     lock->context.server == brl->server) {
 491                         /* found it - delete it */
 492                         if (count == 1) {
 493                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 494                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 495                                         goto fail;
 496                                 }
 497                         } else {
 498                                 if (i < count-1) {
 499                                         memmove(&locks[i], &locks[i+1],
 500                                                 sizeof(*locks)*((count-1) - i));
 501                                 }
 502                                 count--;
 503                                 dbuf.dsize = count * sizeof(*locks);
 504                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 505                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 506                                         goto fail;
 507                                 }
 508                         }
 509
 510                         free(dbuf.dptr);
 511                         tdb_chainunlock(brl->w->tdb, kbuf);
 512                         return NT_STATUS_OK;
 513                 }
 514         }
 515
 516         /* we didn't find it */
 517         status = NT_STATUS_RANGE_NOT_LOCKED;
 518
 519  fail:
 520         free(dbuf.dptr);
 521         tdb_chainunlock(brl->w->tdb, kbuf);
 522         return status;
 523 }
 524
 525
 526 /*
 527   Test if we are allowed to perform IO on a region of an open file
 528 */
 529 NTSTATUS brl_locktest(struct brl_context *brl,
 530                       DATA_BLOB *file_key,
 531                       uint16_t fnum,
 532                       uint16_t smbpid,
 533                       uint64_t start, uint64_t size,
 534                       enum brl_type lock_type)
 535 {
 536         TDB_DATA kbuf, dbuf;
 537         int count, i;
 538         struct lock_struct lock, *locks;
 539
 540         kbuf.dptr = (char *)file_key->data;
 541         kbuf.dsize = file_key->length;
 542
 543         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 544         if (dbuf.dptr == NULL) {
 545                 return NT_STATUS_OK;
 546         }
 547
 548         lock.context.smbpid = smbpid;
 549         lock.context.server = brl->server;
 550         lock.context.tid = brl->tid;
 551         lock.start = start;
 552         lock.size = size;
 553         lock.fnum = fnum;
 554         lock.lock_type = lock_type;
 555
 556         /* there are existing locks - make sure they don't conflict */
 557         locks = (struct lock_struct *)dbuf.dptr;
 558         count = dbuf.dsize / sizeof(*locks);
 559
 560         for (i=0; i<count; i++) {
 561                 if (brl_conflict_other(&locks[i], &lock)) {
 562                         free(dbuf.dptr);
 563                         return NT_STATUS_FILE_LOCK_CONFLICT;
 564                 }
 565         }
 566
 567         free(dbuf.dptr);
 568         return NT_STATUS_OK;
 569 }
 570
 571
 572 /*
 573  Remove any locks associated with a open file.
 574 */
 575 NTSTATUS brl_close(struct brl_context *brl,
 576                    DATA_BLOB *file_key, int fnum)
 577 {
 578         TDB_DATA kbuf, dbuf;
 579         int count, i, dcount=0;
 580         struct lock_struct *locks;
 581         NTSTATUS status;
 582
 583         kbuf.dptr = (char *)file_key->data;
 584         kbuf.dsize = file_key->length;
 585
 586         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 587                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 588         }
 589
 590         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 591         if (!dbuf.dptr) {
 592                 tdb_chainunlock(brl->w->tdb, kbuf);
 593                 return NT_STATUS_OK;
 594         }
 595
 596         /* there are existing locks - remove any for this fnum */
 597         locks = (struct lock_struct *)dbuf.dptr;
 598         count = dbuf.dsize / sizeof(*locks);
 599
 600         for (i=0; i<count; i++) {
 601                 struct lock_struct *lock = &locks[i];
 602
 603                 if (lock->context.tid == brl->tid &&
 604                     lock->context.server == brl->server &&
 605                     lock->fnum == fnum) {
 606                         /* found it - delete it */
 607                         if (count > 1 && i < count-1) {
 608                                 memmove(&locks[i], &locks[i+1],
 609                                         sizeof(*locks)*((count-1) - i));
 610                         }
 611                         count--;
 612                         i--;
 613                         dcount++;
 614                 }
 615         }
 616
 617         status = NT_STATUS_OK;
 618
 619         if (count == 0) {
 620                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 621                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 622                 }
 623         } else if (dcount != 0) {
 624                 /* tell all pending lock holders for this file that
 625                    they have a chance now. This is a bit indiscriminant,
 626                    but works OK */
 627                 brl_notify_all(brl, locks, count);
 628
 629                 dbuf.dsize = count * sizeof(*locks);
 630
 631                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 632                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 633                 }
 634         }
 635
 636         free(dbuf.dptr);
 637         tdb_chainunlock(brl->w->tdb, kbuf);
 638
 639         return status;
 640 }
 641