source3/lib/g_lock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    global locks based on dbwrap and messaging
   4    Copyright (C) 2009 by Volker Lendecke
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20 #include "includes.h"
  21 #include "g_lock.h"
  22
  23 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
  24                                     struct server_id pid);
  25
  26 struct g_lock_ctx {
  27         struct db_context *db;
  28         struct messaging_context *msg;
  29 };
  30
  31 /*
  32  * The "g_lock.tdb" file contains records, indexed by the 0-terminated
  33  * lockname. The record contains an array of "struct g_lock_rec"
  34  * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
  35  */
  36
  37 struct g_lock_rec {
  38         enum g_lock_type lock_type;
  39         struct server_id pid;
  40 };
  41
  42 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
  43                                    struct messaging_context *msg)
  44 {
  45         struct g_lock_ctx *result;
  46
  47         result = talloc(mem_ctx, struct g_lock_ctx);
  48         if (result == NULL) {
  49                 return NULL;
  50         }
  51         result->msg = msg;
  52
  53         result->db = db_open(result, lock_path("g_lock.tdb"), 0,
  54                              TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
  55         if (result->db == NULL) {
  56                 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
  57                 TALLOC_FREE(result);
  58                 return NULL;
  59         }
  60         return result;
  61 }
  62
  63 static bool g_lock_conflicts(enum g_lock_type lock_type,
  64                              const struct g_lock_rec *rec)
  65 {
  66         enum g_lock_type rec_lock = rec->lock_type;
  67
  68         if ((rec_lock & G_LOCK_PENDING) != 0) {
  69                 return false;
  70         }
  71
  72         /*
  73          * Only tested write locks so far. Very likely this routine
  74          * needs to be fixed for read locks....
  75          */
  76         if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
  77                 return false;
  78         }
  79         return true;
  80 }
  81
  82 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
  83                          int *pnum_locks, struct g_lock_rec **plocks)
  84 {
  85         int i, num_locks;
  86         struct g_lock_rec *locks;
  87
  88         if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
  89                 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
  90                 return false;
  91         }
  92
  93         num_locks = data.dsize / sizeof(struct g_lock_rec);
  94         locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
  95         if (locks == NULL) {
  96                 DEBUG(1, ("talloc failed\n"));
  97                 return false;
  98         }
  99
 100         memcpy(locks, data.dptr, data.dsize);
 101
 102         DEBUG(10, ("locks:\n"));
 103         for (i=0; i<num_locks; i++) {
 104                 DEBUGADD(10, ("%s: %s %s\n",
 105                               procid_str(talloc_tos(), &locks[i].pid),
 106                               ((locks[i].lock_type & 1) == G_LOCK_READ) ?
 107                               "read" : "write",
 108                               (locks[i].lock_type & G_LOCK_PENDING) ?
 109                               "(pending)" : "(owner)"));
 110
 111                 if (process_exists(locks[i].pid)) {
 112                         continue;
 113                 }
 114                 DEBUGADD(10, ("%s does not exist -- discarding\n",
 115                               procid_str(talloc_tos(), &locks[i].pid)));
 116
 117                 if (i < (num_locks-1)) {
 118                         locks[i] = locks[num_locks-1];
 119                 }
 120                 num_locks -= 1;
 121         }
 122
 123         *plocks = locks;
 124         *pnum_locks = num_locks;
 125         return true;
 126 }
 127
 128 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
 129                                         struct g_lock_rec *locks,
 130                                         int num_locks,
 131                                         const struct server_id pid,
 132                                         enum g_lock_type lock_type)
 133 {
 134         struct g_lock_rec *result;
 135
 136         result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
 137                                 num_locks+1);
 138         if (result == NULL) {
 139                 return NULL;
 140         }
 141
 142         result[num_locks].pid = pid;
 143         result[num_locks].lock_type = lock_type;
 144         return result;
 145 }
 146
 147 static void g_lock_got_retry(struct messaging_context *msg,
 148                              void *private_data,
 149                              uint32_t msg_type,
 150                              struct server_id server_id,
 151                              DATA_BLOB *data);
 152 static void g_lock_timedout(struct tevent_context *ev,
 153                             struct tevent_timer *te,
 154                             struct timeval current_time,
 155                             void *private_data);
 156
 157 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
 158                                enum g_lock_type lock_type)
 159 {
 160         struct db_record *rec = NULL;
 161         struct g_lock_rec *locks = NULL;
 162         int i, num_locks;
 163         struct server_id self;
 164         int our_index;
 165         TDB_DATA data;
 166         NTSTATUS status = NT_STATUS_OK;
 167         NTSTATUS store_status;
 168
 169 again:
 170         rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
 171                                     string_term_tdb_data(name));
 172         if (rec == NULL) {
 173                 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
 174                 status = NT_STATUS_LOCK_NOT_GRANTED;
 175                 goto done;
 176         }
 177
 178         if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
 179                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
 180                 status = NT_STATUS_INTERNAL_ERROR;
 181                 goto done;
 182         }
 183
 184         self = procid_self();
 185         our_index = -1;
 186
 187         for (i=0; i<num_locks; i++) {
 188                 if (procid_equal(&self, &locks[i].pid)) {
 189                         if (our_index != -1) {
 190                                 DEBUG(1, ("g_lock_trylock: Added ourself "
 191                                           "twice!\n"));
 192                                 status = NT_STATUS_INTERNAL_ERROR;
 193                                 goto done;
 194                         }
 195                         if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
 196                                 DEBUG(1, ("g_lock_trylock: Found ourself not "
 197                                           "pending!\n"));
 198                                 status = NT_STATUS_INTERNAL_ERROR;
 199                                 goto done;
 200                         }
 201
 202                         our_index = i;
 203
 204                         /* never conflict with ourself */
 205                         continue;
 206                 }
 207                 if (g_lock_conflicts(lock_type, &locks[i])) {
 208                         struct server_id pid = locks[i].pid;
 209
 210                         if (!process_exists(pid)) {
 211                                 TALLOC_FREE(locks);
 212                                 TALLOC_FREE(rec);
 213                                 status = g_lock_force_unlock(ctx, name, pid);
 214                                 if (!NT_STATUS_IS_OK(status)) {
 215                                         DEBUG(1, ("Could not unlock dead lock "
 216                                                   "holder!\n"));
 217                                         goto done;
 218                                 }
 219                                 goto again;
 220                         }
 221                         lock_type |= G_LOCK_PENDING;
 222                 }
 223         }
 224
 225         if (our_index == -1) {
 226                 /* First round, add ourself */
 227
 228                 locks = g_lock_addrec(talloc_tos(), locks, num_locks,
 229                                       self, lock_type);
 230                 if (locks == NULL) {
 231                         DEBUG(10, ("g_lock_addrec failed\n"));
 232                         status = NT_STATUS_NO_MEMORY;
 233                         goto done;
 234                 }
 235         } else {
 236                 /*
 237                  * Retry. We were pending last time. Overwrite the
 238                  * stored lock_type with what we calculated, we might
 239                  * have acquired the lock this time.
 240                  */
 241                 locks[our_index].lock_type = lock_type;
 242         }
 243
 244         data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
 245         store_status = rec->store(rec, data, 0);
 246         if (!NT_STATUS_IS_OK(store_status)) {
 247                 DEBUG(1, ("rec->store failed: %s\n",
 248                           nt_errstr(store_status)));
 249                 status = store_status;
 250         }
 251
 252 done:
 253         TALLOC_FREE(locks);
 254         TALLOC_FREE(rec);
 255
 256         if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
 257                 return STATUS_PENDING;
 258         }
 259
 260         return NT_STATUS_OK;
 261 }
 262
 263 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
 264                      enum g_lock_type lock_type, struct timeval timeout)
 265 {
 266         struct tevent_timer *te = NULL;
 267         NTSTATUS status;
 268         bool retry = false;
 269         struct timeval timeout_end;
 270         struct timeval timeout_remaining;
 271         struct timeval time_now;
 272
 273         DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
 274                    name));
 275
 276         if (lock_type & ~1) {
 277                 DEBUG(1, ("Got invalid lock type %d for %s\n",
 278                           (int)lock_type, name));
 279                 return NT_STATUS_INVALID_PARAMETER;
 280         }
 281
 282 #ifdef CLUSTER_SUPPORT
 283         if (lp_clustering()) {
 284                 status = ctdb_watch_us(messaging_ctdbd_connection());
 285                 if (!NT_STATUS_IS_OK(status)) {
 286                         DEBUG(10, ("could not register retry with ctdb: %s\n",
 287                                    nt_errstr(status)));
 288                         goto done;
 289                 }
 290         }
 291 #endif
 292
 293         status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
 294                                     g_lock_got_retry);
 295         if (!NT_STATUS_IS_OK(status)) {
 296                 DEBUG(10, ("messaging_register failed: %s\n",
 297                            nt_errstr(status)));
 298                 return status;
 299         }
 300
 301         time_now = timeval_current();
 302         timeout_end = timeval_sum(&time_now, &timeout);
 303
 304         while (true) {
 305                 fd_set _r_fds;
 306                 fd_set *r_fds = NULL;
 307                 int max_fd = 0;
 308                 int ret;
 309
 310                 status = g_lock_trylock(ctx, name, lock_type);
 311                 if (NT_STATUS_IS_OK(status)) {
 312                         DEBUG(10, ("Got lock %s\n", name));
 313                         break;
 314                 }
 315                 if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
 316                         DEBUG(10, ("g_lock_trylock failed: %s\n",
 317                                    nt_errstr(status)));
 318                         break;
 319                 }
 320
 321                 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
 322
 323                 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 324                  *             !!! HACK ALERT --- FIX ME !!!
 325                  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 326                  * What we really want to do here is to react to
 327                  * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
 328                  * by a client doing g_lock_unlock or by ourselves when
 329                  * we receive a CTDB_SRVID_SAMBA_NOTIFY or
 330                  * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
 331                  * either a client holding a lock or a complete node
 332                  * has died.
 333                  *
 334                  * Doing this properly involves calling tevent_loop_once(),
 335                  * but doing this here with the main ctdbd messaging context
 336                  * creates a nested event loop when g_lock_lock() is called
 337                  * from the main event loop, e.g. in a tcon_and_X where the
 338                  * share_info.tdb needs to be initialized and is locked by
 339                  * another process, or when the remore registry is accessed
 340                  * for writing and some other process already holds a lock
 341                  * on the registry.tdb.
 342                  *
 343                  * So as a quick fix, we act a little corasely here: we do
 344                  * a select on the ctdb connection fd and when it is readable
 345                  * or we get EINTR, then we retry without actually parsing
 346                  * any ctdb packages or dispatching messages. This means that
 347                  * we retry more often than intended by design, but this does
 348                  * not harm and it is unobtrusive. When we have finished,
 349                  * the main loop will pick up all the messages and ctdb
 350                  * packets. The only extra twist is that we cannot use timed
 351                  * events here but have to handcode a timeout.
 352                  */
 353
 354 #ifdef CLUSTER_SUPPORT
 355                 if (lp_clustering()) {
 356                         struct ctdbd_connection *conn = messaging_ctdbd_connection();
 357
 358                         r_fds = &_r_fds;
 359                         FD_ZERO(r_fds);
 360                         max_fd = ctdbd_conn_get_fd(conn);
 361                         FD_SET(max_fd, r_fds);
 362                 }
 363 #endif
 364
 365                 time_now = timeval_current();
 366                 timeout_remaining = timeval_until(&time_now, &timeout_end);
 367
 368                 ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
 369                                  &timeout_remaining);
 370
 371                 if (ret == -1) {
 372                         if (errno != EINTR) {
 373                                 DEBUG(1, ("error calling select: %s\n",
 374                                           strerror(errno)));
 375                                 status = NT_STATUS_INTERNAL_ERROR;
 376                                 break;
 377                         }
 378                         /*
 379                          * errno == EINTR:
 380                          * This means a signal was received.
 381                          * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
 382                          * ==> retry
 383                          */
 384                 } else if (ret == 0) {
 385                         if (timeval_expired(&timeout_end)) {
 386                                 DEBUG(10, ("g_lock_lock timed out\n"));
 387                                 status = NT_STATUS_LOCK_NOT_GRANTED;
 388                                 break;
 389                         } else {
 390                                 DEBUG(10, ("select returned 0 but timeout not "
 391                                            "not expired: strange - retrying\n"));
 392                         }
 393                 } else if (ret != 1) {
 394                         DEBUG(1, ("invalid return code of select: %d\n", ret));
 395                         status = NT_STATUS_INTERNAL_ERROR;
 396                         break;
 397                 }
 398                 /*
 399                  * ret == 1:
 400                  * This means ctdbd has sent us some data.
 401                  * Might be a CTDB_SRVID_RECONFIGURE or a
 402                  * CTDB_SRVID_SAMBA_NOTIFY message.
 403                  * ==> retry
 404                  */
 405         }
 406
 407 done:
 408
 409         if (!NT_STATUS_IS_OK(status)) {
 410                 NTSTATUS unlock_status;
 411
 412                 unlock_status = g_lock_unlock(ctx, name);
 413
 414                 if (!NT_STATUS_IS_OK(unlock_status)) {
 415                         DEBUG(1, ("Could not remove ourself from the locking "
 416                                   "db: %s\n", nt_errstr(status)));
 417                 }
 418         }
 419
 420         messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
 421         TALLOC_FREE(te);
 422
 423         return status;
 424 }
 425
 426 static void g_lock_got_retry(struct messaging_context *msg,
 427                              void *private_data,
 428                              uint32_t msg_type,
 429                              struct server_id server_id,
 430                              DATA_BLOB *data)
 431 {
 432         bool *pretry = (bool *)private_data;
 433
 434         DEBUG(10, ("Got retry message from pid %s\n",
 435                    procid_str(talloc_tos(), &server_id)));
 436
 437         *pretry = true;
 438 }
 439
 440 static void g_lock_timedout(struct tevent_context *ev,
 441                             struct tevent_timer *te,
 442                             struct timeval current_time,
 443                             void *private_data)
 444 {
 445         bool *ptimedout = (bool *)private_data;
 446         *ptimedout = true;
 447         TALLOC_FREE(te);
 448 }
 449
 450 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
 451                                     struct server_id pid)
 452 {
 453         struct db_record *rec = NULL;
 454         struct g_lock_rec *locks = NULL;
 455         int i, num_locks;
 456         enum g_lock_type lock_type;
 457         NTSTATUS status;
 458
 459         rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
 460                                     string_term_tdb_data(name));
 461         if (rec == NULL) {
 462                 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
 463                 status = NT_STATUS_INTERNAL_ERROR;
 464                 goto done;
 465         }
 466
 467         if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
 468                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
 469                 status = NT_STATUS_INTERNAL_ERROR;
 470                 goto done;
 471         }
 472
 473         for (i=0; i<num_locks; i++) {
 474                 if (procid_equal(&pid, &locks[i].pid)) {
 475                         break;
 476                 }
 477         }
 478
 479         if (i == num_locks) {
 480                 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
 481                 status = NT_STATUS_INTERNAL_ERROR;
 482                 goto done;
 483         }
 484
 485         lock_type = locks[i].lock_type;
 486
 487         if (i < (num_locks-1)) {
 488                 locks[i] = locks[num_locks-1];
 489         }
 490         num_locks -= 1;
 491
 492         if (num_locks == 0) {
 493                 status = rec->delete_rec(rec);
 494         } else {
 495                 TDB_DATA data;
 496                 data = make_tdb_data((uint8_t *)locks,
 497                                      sizeof(struct g_lock_rec) * num_locks);
 498                 status = rec->store(rec, data, 0);
 499         }
 500
 501         if (!NT_STATUS_IS_OK(status)) {
 502                 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
 503                           nt_errstr(status)));
 504                 goto done;
 505         }
 506
 507         if ((lock_type & G_LOCK_PENDING) == 0) {
 508                 /*
 509                  * We've been the lock holder. Tell all others to retry.
 510                  */
 511                 for (i=0; i<num_locks; i++) {
 512                         if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
 513                                 continue;
 514                         }
 515
 516                         /*
 517                          * Ping all waiters to retry
 518                          */
 519                         status = messaging_send(ctx->msg, locks[i].pid,
 520                                                 MSG_DBWRAP_G_LOCK_RETRY,
 521                                                 &data_blob_null);
 522                         if (!NT_STATUS_IS_OK(status)) {
 523                                 DEBUG(1, ("sending retry to %s failed: %s\n",
 524                                           procid_str(talloc_tos(),
 525                                                      &locks[i].pid),
 526                                           nt_errstr(status)));
 527                         }
 528                 }
 529         }
 530 done:
 531
 532         TALLOC_FREE(locks);
 533         TALLOC_FREE(rec);
 534         return status;
 535 }
 536
 537 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
 538 {
 539         NTSTATUS status;
 540
 541         status = g_lock_force_unlock(ctx, name, procid_self());
 542
 543 #ifdef CLUSTER_SUPPORT
 544         if (lp_clustering()) {
 545                 ctdb_unwatch(messaging_ctdbd_connection());
 546         }
 547 #endif
 548         return status;
 549 }
 550
 551 struct g_lock_locks_state {
 552         int (*fn)(const char *name, void *private_data);
 553         void *private_data;
 554 };
 555
 556 static int g_lock_locks_fn(struct db_record *rec, void *priv)
 557 {
 558         struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
 559
 560         if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
 561                 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
 562                 return 0;
 563         }
 564         return state->fn((char *)rec->key.dptr, state->private_data);
 565 }
 566
 567 int g_lock_locks(struct g_lock_ctx *ctx,
 568                  int (*fn)(const char *name, void *private_data),
 569                  void *private_data)
 570 {
 571         struct g_lock_locks_state state;
 572
 573         state.fn = fn;
 574         state.private_data = private_data;
 575
 576         return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
 577 }
 578
 579 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
 580                      int (*fn)(struct server_id pid,
 581                                enum g_lock_type lock_type,
 582                                void *private_data),
 583                      void *private_data)
 584 {
 585         TDB_DATA data;
 586         int i, num_locks;
 587         struct g_lock_rec *locks = NULL;
 588         bool ret;
 589
 590         if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
 591                            &data) != 0) {
 592                 return NT_STATUS_NOT_FOUND;
 593         }
 594
 595         if ((data.dsize == 0) || (data.dptr == NULL)) {
 596                 return NT_STATUS_OK;
 597         }
 598
 599         ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
 600
 601         TALLOC_FREE(data.dptr);
 602
 603         if (!ret) {
 604                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
 605                 return NT_STATUS_INTERNAL_ERROR;
 606         }
 607
 608         for (i=0; i<num_locks; i++) {
 609                 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
 610                         break;
 611                 }
 612         }
 613         TALLOC_FREE(locks);
 614         return NT_STATUS_OK;
 615 }
 616
 617 struct g_lock_get_state {
 618         bool found;
 619         struct server_id *pid;
 620 };
 621
 622 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
 623                          void *priv)
 624 {
 625         struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
 626
 627         if ((lock_type & G_LOCK_PENDING) != 0) {
 628                 return 0;
 629         }
 630
 631         state->found = true;
 632         *state->pid = pid;
 633         return 1;
 634 }
 635
 636 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
 637                     struct server_id *pid)
 638 {
 639         struct g_lock_get_state state;
 640         NTSTATUS status;
 641
 642         state.found = false;
 643         state.pid = pid;
 644
 645         status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
 646         if (!NT_STATUS_IS_OK(status)) {
 647                 return status;
 648         }
 649         if (!state.found) {
 650                 return NT_STATUS_NOT_FOUND;
 651         }
 652         return NT_STATUS_OK;
 653 }