fs/nfs/nfs4state.c

   1 /*
   2  *  fs/nfs/nfs4state.c
   3  *
   4  *  Client-side XDR for NFSv4.
   5  *
   6  *  Copyright (c) 2002 The Regents of the University of Michigan.
   7  *  All rights reserved.
   8  *
   9  *  Kendrick Smith <kmsmith@umich.edu>
  10  *
  11  *  Redistribution and use in source and binary forms, with or without
  12  *  modification, are permitted provided that the following conditions
  13  *  are met:
  14  *
  15  *  1. Redistributions of source code must retain the above copyright
  16  *     notice, this list of conditions and the following disclaimer.
  17  *  2. Redistributions in binary form must reproduce the above copyright
  18  *     notice, this list of conditions and the following disclaimer in the
  19  *     documentation and/or other materials provided with the distribution.
  20  *  3. Neither the name of the University nor the names of its
  21  *     contributors may be used to endorse or promote products derived
  22  *     from this software without specific prior written permission.
  23  *
  24  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  25  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  26  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  27  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  31  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  32  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  33  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  * Implementation of the NFSv4 state model.  For the time being,
  37  * this is minimal, but will be made much more complex in a
  38  * subsequent patch.
  39  */
  40
  41 #include <linux/kernel.h>
  42 #include <linux/slab.h>
  43 #include <linux/smp_lock.h>
  44 #include <linux/nfs_fs.h>
  45 #include <linux/nfs_idmap.h>
  46 #include <linux/kthread.h>
  47 #include <linux/module.h>
  48 #include <linux/random.h>
  49 #include <linux/workqueue.h>
  50 #include <linux/bitops.h>
  51
  52 #include "nfs4_fs.h"
  53 #include "callback.h"
  54 #include "delegation.h"
  55 #include "internal.h"
  56
  57 #define OPENOWNER_POOL_SIZE     8
  58
  59 const nfs4_stateid zero_stateid;
  60
  61 static LIST_HEAD(nfs4_clientid_list);
  62
  63 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
  64 {
  65         int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
  66                         nfs_callback_tcpport, cred);
  67         if (status == 0)
  68                 status = nfs4_proc_setclientid_confirm(clp, cred);
  69         if (status == 0)
  70                 nfs4_schedule_state_renewal(clp);
  71         return status;
  72 }
  73
  74 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
  75 {
  76         struct nfs4_state_owner *sp;
  77         struct rb_node *pos;
  78         struct rpc_cred *cred = NULL;
  79
  80         for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
  81                 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
  82                 if (list_empty(&sp->so_states))
  83                         continue;
  84                 cred = get_rpccred(sp->so_cred);
  85                 break;
  86         }
  87         return cred;
  88 }
  89
  90 static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
  91 {
  92         struct nfs4_state_owner *sp;
  93         struct rb_node *pos;
  94
  95         pos = rb_first(&clp->cl_state_owners);
  96         if (pos != NULL) {
  97                 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
  98                 return get_rpccred(sp->so_cred);
  99         }
 100         return NULL;
 101 }
 102
 103 static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
 104                 __u64 minval, int maxbits)
 105 {
 106         struct rb_node **p, *parent;
 107         struct nfs_unique_id *pos;
 108         __u64 mask = ~0ULL;
 109
 110         if (maxbits < 64)
 111                 mask = (1ULL << maxbits) - 1ULL;
 112
 113         /* Ensure distribution is more or less flat */
 114         get_random_bytes(&new->id, sizeof(new->id));
 115         new->id &= mask;
 116         if (new->id < minval)
 117                 new->id += minval;
 118 retry:
 119         p = &root->rb_node;
 120         parent = NULL;
 121
 122         while (*p != NULL) {
 123                 parent = *p;
 124                 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
 125
 126                 if (new->id < pos->id)
 127                         p = &(*p)->rb_left;
 128                 else if (new->id > pos->id)
 129                         p = &(*p)->rb_right;
 130                 else
 131                         goto id_exists;
 132         }
 133         rb_link_node(&new->rb_node, parent, p);
 134         rb_insert_color(&new->rb_node, root);
 135         return;
 136 id_exists:
 137         for (;;) {
 138                 new->id++;
 139                 if (new->id < minval || (new->id & mask) != new->id) {
 140                         new->id = minval;
 141                         break;
 142                 }
 143                 parent = rb_next(parent);
 144                 if (parent == NULL)
 145                         break;
 146                 pos = rb_entry(parent, struct nfs_unique_id, rb_node);
 147                 if (new->id < pos->id)
 148                         break;
 149         }
 150         goto retry;
 151 }
 152
 153 static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
 154 {
 155         rb_erase(&id->rb_node, root);
 156 }
 157
 158 static struct nfs4_state_owner *
 159 nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 160 {
 161         struct nfs_client *clp = server->nfs_client;
 162         struct rb_node **p = &clp->cl_state_owners.rb_node,
 163                        *parent = NULL;
 164         struct nfs4_state_owner *sp, *res = NULL;
 165
 166         while (*p != NULL) {
 167                 parent = *p;
 168                 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
 169
 170                 if (server < sp->so_server) {
 171                         p = &parent->rb_left;
 172                         continue;
 173                 }
 174                 if (server > sp->so_server) {
 175                         p = &parent->rb_right;
 176                         continue;
 177                 }
 178                 if (cred < sp->so_cred)
 179                         p = &parent->rb_left;
 180                 else if (cred > sp->so_cred)
 181                         p = &parent->rb_right;
 182                 else {
 183                         atomic_inc(&sp->so_count);
 184                         res = sp;
 185                         break;
 186                 }
 187         }
 188         return res;
 189 }
 190
 191 static struct nfs4_state_owner *
 192 nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
 193 {
 194         struct rb_node **p = &clp->cl_state_owners.rb_node,
 195                        *parent = NULL;
 196         struct nfs4_state_owner *sp;
 197
 198         while (*p != NULL) {
 199                 parent = *p;
 200                 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
 201
 202                 if (new->so_server < sp->so_server) {
 203                         p = &parent->rb_left;
 204                         continue;
 205                 }
 206                 if (new->so_server > sp->so_server) {
 207                         p = &parent->rb_right;
 208                         continue;
 209                 }
 210                 if (new->so_cred < sp->so_cred)
 211                         p = &parent->rb_left;
 212                 else if (new->so_cred > sp->so_cred)
 213                         p = &parent->rb_right;
 214                 else {
 215                         atomic_inc(&sp->so_count);
 216                         return sp;
 217                 }
 218         }
 219         nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
 220         rb_link_node(&new->so_client_node, parent, p);
 221         rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
 222         return new;
 223 }
 224
 225 static void
 226 nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
 227 {
 228         if (!RB_EMPTY_NODE(&sp->so_client_node))
 229                 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
 230         nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
 231 }
 232
 233 /*
 234  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
 235  * create a new state_owner.
 236  *
 237  */
 238 static struct nfs4_state_owner *
 239 nfs4_alloc_state_owner(void)
 240 {
 241         struct nfs4_state_owner *sp;
 242
 243         sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 244         if (!sp)
 245                 return NULL;
 246         spin_lock_init(&sp->so_lock);
 247         INIT_LIST_HEAD(&sp->so_states);
 248         INIT_LIST_HEAD(&sp->so_delegations);
 249         rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
 250         sp->so_seqid.sequence = &sp->so_sequence;
 251         spin_lock_init(&sp->so_sequence.lock);
 252         INIT_LIST_HEAD(&sp->so_sequence.list);
 253         atomic_set(&sp->so_count, 1);
 254         return sp;
 255 }
 256
 257 void
 258 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 259 {
 260         if (!RB_EMPTY_NODE(&sp->so_client_node)) {
 261                 struct nfs_client *clp = sp->so_client;
 262
 263                 spin_lock(&clp->cl_lock);
 264                 rb_erase(&sp->so_client_node, &clp->cl_state_owners);
 265                 RB_CLEAR_NODE(&sp->so_client_node);
 266                 spin_unlock(&clp->cl_lock);
 267         }
 268 }
 269
 270 /*
 271  * Note: must be called with clp->cl_sem held in order to prevent races
 272  *       with reboot recovery!
 273  */
 274 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 275 {
 276         struct nfs_client *clp = server->nfs_client;
 277         struct nfs4_state_owner *sp, *new;
 278
 279         spin_lock(&clp->cl_lock);
 280         sp = nfs4_find_state_owner(server, cred);
 281         spin_unlock(&clp->cl_lock);
 282         if (sp != NULL)
 283                 return sp;
 284         new = nfs4_alloc_state_owner();
 285         if (new == NULL)
 286                 return NULL;
 287         new->so_client = clp;
 288         new->so_server = server;
 289         new->so_cred = cred;
 290         spin_lock(&clp->cl_lock);
 291         sp = nfs4_insert_state_owner(clp, new);
 292         spin_unlock(&clp->cl_lock);
 293         if (sp == new)
 294                 get_rpccred(cred);
 295         else
 296                 kfree(new);
 297         return sp;
 298 }
 299
 300 /*
 301  * Must be called with clp->cl_sem held in order to avoid races
 302  * with state recovery...
 303  */
 304 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 305 {
 306         struct nfs_client *clp = sp->so_client;
 307         struct rpc_cred *cred = sp->so_cred;
 308
 309         if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 310                 return;
 311         nfs4_remove_state_owner(clp, sp);
 312         spin_unlock(&clp->cl_lock);
 313         put_rpccred(cred);
 314         kfree(sp);
 315 }
 316
 317 static struct nfs4_state *
 318 nfs4_alloc_open_state(void)
 319 {
 320         struct nfs4_state *state;
 321
 322         state = kzalloc(sizeof(*state), GFP_KERNEL);
 323         if (!state)
 324                 return NULL;
 325         atomic_set(&state->count, 1);
 326         INIT_LIST_HEAD(&state->lock_states);
 327         spin_lock_init(&state->state_lock);
 328         seqlock_init(&state->seqlock);
 329         return state;
 330 }
 331
 332 void
 333 nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
 334 {
 335         if (state->state == mode)
 336                 return;
 337         /* NB! List reordering - see the reclaim code for why.  */
 338         if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
 339                 if (mode & FMODE_WRITE)
 340                         list_move(&state->open_states, &state->owner->so_states);
 341                 else
 342                         list_move_tail(&state->open_states, &state->owner->so_states);
 343         }
 344         state->state = mode;
 345 }
 346
 347 static struct nfs4_state *
 348 __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 349 {
 350         struct nfs_inode *nfsi = NFS_I(inode);
 351         struct nfs4_state *state;
 352
 353         list_for_each_entry(state, &nfsi->open_states, inode_states) {
 354                 if (state->owner != owner)
 355                         continue;
 356                 if (atomic_inc_not_zero(&state->count))
 357                         return state;
 358         }
 359         return NULL;
 360 }
 361
 362 static void
 363 nfs4_free_open_state(struct nfs4_state *state)
 364 {
 365         kfree(state);
 366 }
 367
 368 struct nfs4_state *
 369 nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 370 {
 371         struct nfs4_state *state, *new;
 372         struct nfs_inode *nfsi = NFS_I(inode);
 373
 374         spin_lock(&inode->i_lock);
 375         state = __nfs4_find_state_byowner(inode, owner);
 376         spin_unlock(&inode->i_lock);
 377         if (state)
 378                 goto out;
 379         new = nfs4_alloc_open_state();
 380         spin_lock(&owner->so_lock);
 381         spin_lock(&inode->i_lock);
 382         state = __nfs4_find_state_byowner(inode, owner);
 383         if (state == NULL && new != NULL) {
 384                 state = new;
 385                 state->owner = owner;
 386                 atomic_inc(&owner->so_count);
 387                 list_add(&state->inode_states, &nfsi->open_states);
 388                 state->inode = igrab(inode);
 389                 spin_unlock(&inode->i_lock);
 390                 /* Note: The reclaim code dictates that we add stateless
 391                  * and read-only stateids to the end of the list */
 392                 list_add_tail(&state->open_states, &owner->so_states);
 393                 spin_unlock(&owner->so_lock);
 394         } else {
 395                 spin_unlock(&inode->i_lock);
 396                 spin_unlock(&owner->so_lock);
 397                 if (new)
 398                         nfs4_free_open_state(new);
 399         }
 400 out:
 401         return state;
 402 }
 403
 404 /*
 405  * Beware! Caller must be holding exactly one
 406  * reference to clp->cl_sem!
 407  */
 408 void nfs4_put_open_state(struct nfs4_state *state)
 409 {
 410         struct inode *inode = state->inode;
 411         struct nfs4_state_owner *owner = state->owner;
 412
 413         if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
 414                 return;
 415         spin_lock(&inode->i_lock);
 416         list_del(&state->inode_states);
 417         list_del(&state->open_states);
 418         spin_unlock(&inode->i_lock);
 419         spin_unlock(&owner->so_lock);
 420         iput(inode);
 421         nfs4_free_open_state(state);
 422         nfs4_put_state_owner(owner);
 423 }
 424
 425 /*
 426  * Close the current file.
 427  */
 428 void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 429 {
 430         struct nfs4_state_owner *owner = state->owner;
 431         int call_close = 0;
 432         int newstate;
 433
 434         atomic_inc(&owner->so_count);
 435         /* Protect against nfs4_find_state() */
 436         spin_lock(&owner->so_lock);
 437         switch (mode & (FMODE_READ | FMODE_WRITE)) {
 438                 case FMODE_READ:
 439                         state->n_rdonly--;
 440                         break;
 441                 case FMODE_WRITE:
 442                         state->n_wronly--;
 443                         break;
 444                 case FMODE_READ|FMODE_WRITE:
 445                         state->n_rdwr--;
 446         }
 447         newstate = FMODE_READ|FMODE_WRITE;
 448         if (state->n_rdwr == 0) {
 449                 if (state->n_rdonly == 0) {
 450                         newstate &= ~FMODE_READ;
 451                         call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
 452                         call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
 453                 }
 454                 if (state->n_wronly == 0) {
 455                         newstate &= ~FMODE_WRITE;
 456                         call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
 457                         call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
 458                 }
 459                 if (newstate == 0)
 460                         clear_bit(NFS_DELEGATED_STATE, &state->flags);
 461         }
 462         nfs4_state_set_mode_locked(state, newstate);
 463         spin_unlock(&owner->so_lock);
 464
 465         if (!call_close) {
 466                 nfs4_put_open_state(state);
 467                 nfs4_put_state_owner(owner);
 468         } else
 469                 nfs4_do_close(path, state);
 470 }
 471
 472 /*
 473  * Search the state->lock_states for an existing lock_owner
 474  * that is compatible with current->files
 475  */
 476 static struct nfs4_lock_state *
 477 __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 478 {
 479         struct nfs4_lock_state *pos;
 480         list_for_each_entry(pos, &state->lock_states, ls_locks) {
 481                 if (pos->ls_owner != fl_owner)
 482                         continue;
 483                 atomic_inc(&pos->ls_count);
 484                 return pos;
 485         }
 486         return NULL;
 487 }
 488
 489 /*
 490  * Return a compatible lock_state. If no initialized lock_state structure
 491  * exists, return an uninitialized one.
 492  *
 493  */
 494 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 495 {
 496         struct nfs4_lock_state *lsp;
 497         struct nfs_client *clp = state->owner->so_client;
 498
 499         lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 500         if (lsp == NULL)
 501                 return NULL;
 502         lsp->ls_seqid.sequence = &state->owner->so_sequence;
 503         atomic_set(&lsp->ls_count, 1);
 504         lsp->ls_owner = fl_owner;
 505         spin_lock(&clp->cl_lock);
 506         nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
 507         spin_unlock(&clp->cl_lock);
 508         INIT_LIST_HEAD(&lsp->ls_locks);
 509         return lsp;
 510 }
 511
 512 static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 513 {
 514         struct nfs_client *clp = lsp->ls_state->owner->so_client;
 515
 516         spin_lock(&clp->cl_lock);
 517         nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
 518         spin_unlock(&clp->cl_lock);
 519         kfree(lsp);
 520 }
 521
 522 /*
 523  * Return a compatible lock_state. If no initialized lock_state structure
 524  * exists, return an uninitialized one.
 525  *
 526  * The caller must be holding clp->cl_sem
 527  */
 528 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 529 {
 530         struct nfs4_lock_state *lsp, *new = NULL;
 531
 532         for(;;) {
 533                 spin_lock(&state->state_lock);
 534                 lsp = __nfs4_find_lock_state(state, owner);
 535                 if (lsp != NULL)
 536                         break;
 537                 if (new != NULL) {
 538                         new->ls_state = state;
 539                         list_add(&new->ls_locks, &state->lock_states);
 540                         set_bit(LK_STATE_IN_USE, &state->flags);
 541                         lsp = new;
 542                         new = NULL;
 543                         break;
 544                 }
 545                 spin_unlock(&state->state_lock);
 546                 new = nfs4_alloc_lock_state(state, owner);
 547                 if (new == NULL)
 548                         return NULL;
 549         }
 550         spin_unlock(&state->state_lock);
 551         if (new != NULL)
 552                 nfs4_free_lock_state(new);
 553         return lsp;
 554 }
 555
 556 /*
 557  * Release reference to lock_state, and free it if we see that
 558  * it is no longer in use
 559  */
 560 void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 561 {
 562         struct nfs4_state *state;
 563
 564         if (lsp == NULL)
 565                 return;
 566         state = lsp->ls_state;
 567         if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
 568                 return;
 569         list_del(&lsp->ls_locks);
 570         if (list_empty(&state->lock_states))
 571                 clear_bit(LK_STATE_IN_USE, &state->flags);
 572         spin_unlock(&state->state_lock);
 573         nfs4_free_lock_state(lsp);
 574 }
 575
 576 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 577 {
 578         struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
 579
 580         dst->fl_u.nfs4_fl.owner = lsp;
 581         atomic_inc(&lsp->ls_count);
 582 }
 583
 584 static void nfs4_fl_release_lock(struct file_lock *fl)
 585 {
 586         nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 587 }
 588
 589 static struct file_lock_operations nfs4_fl_lock_ops = {
 590         .fl_copy_lock = nfs4_fl_copy_lock,
 591         .fl_release_private = nfs4_fl_release_lock,
 592 };
 593
 594 int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 595 {
 596         struct nfs4_lock_state *lsp;
 597
 598         if (fl->fl_ops != NULL)
 599                 return 0;
 600         lsp = nfs4_get_lock_state(state, fl->fl_owner);
 601         if (lsp == NULL)
 602                 return -ENOMEM;
 603         fl->fl_u.nfs4_fl.owner = lsp;
 604         fl->fl_ops = &nfs4_fl_lock_ops;
 605         return 0;
 606 }
 607
 608 /*
 609  * Byte-range lock aware utility to initialize the stateid of read/write
 610  * requests.
 611  */
 612 void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 613 {
 614         struct nfs4_lock_state *lsp;
 615         int seq;
 616
 617         do {
 618                 seq = read_seqbegin(&state->seqlock);
 619                 memcpy(dst, &state->stateid, sizeof(*dst));
 620         } while (read_seqretry(&state->seqlock, seq));
 621         if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 622                 return;
 623
 624         spin_lock(&state->state_lock);
 625         lsp = __nfs4_find_lock_state(state, fl_owner);
 626         if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
 627                 memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
 628         spin_unlock(&state->state_lock);
 629         nfs4_put_lock_state(lsp);
 630 }
 631
 632 struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 633 {
 634         struct rpc_sequence *sequence = counter->sequence;
 635         struct nfs_seqid *new;
 636
 637         new = kmalloc(sizeof(*new), GFP_KERNEL);
 638         if (new != NULL) {
 639                 new->sequence = counter;
 640                 spin_lock(&sequence->lock);
 641                 list_add_tail(&new->list, &sequence->list);
 642                 spin_unlock(&sequence->lock);
 643         }
 644         return new;
 645 }
 646
 647 void nfs_free_seqid(struct nfs_seqid *seqid)
 648 {
 649         struct rpc_sequence *sequence = seqid->sequence->sequence;
 650
 651         spin_lock(&sequence->lock);
 652         list_del(&seqid->list);
 653         spin_unlock(&sequence->lock);
 654         rpc_wake_up(&sequence->wait);
 655         kfree(seqid);
 656 }
 657
 658 /*
 659  * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
 660  * failed with a seqid incrementing error -
 661  * see comments nfs_fs.h:seqid_mutating_error()
 662  */
 663 static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 664 {
 665         switch (status) {
 666                 case 0:
 667                         break;
 668                 case -NFS4ERR_BAD_SEQID:
 669                         if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
 670                                 return;
 671                         printk(KERN_WARNING "NFS: v4 server returned a bad"
 672                                         "sequence-id error on an"
 673                                         "unconfirmed sequence %p!\n",
 674                                         seqid->sequence);
 675                 case -NFS4ERR_STALE_CLIENTID:
 676                 case -NFS4ERR_STALE_STATEID:
 677                 case -NFS4ERR_BAD_STATEID:
 678                 case -NFS4ERR_BADXDR:
 679                 case -NFS4ERR_RESOURCE:
 680                 case -NFS4ERR_NOFILEHANDLE:
 681                         /* Non-seqid mutating errors */
 682                         return;
 683         };
 684         /*
 685          * Note: no locking needed as we are guaranteed to be first
 686          * on the sequence list
 687          */
 688         seqid->sequence->counter++;
 689 }
 690
 691 void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 692 {
 693         if (status == -NFS4ERR_BAD_SEQID) {
 694                 struct nfs4_state_owner *sp = container_of(seqid->sequence,
 695                                 struct nfs4_state_owner, so_seqid);
 696                 nfs4_drop_state_owner(sp);
 697         }
 698         nfs_increment_seqid(status, seqid);
 699 }
 700
 701 /*
 702  * Increment the seqid if the LOCK/LOCKU succeeded, or
 703  * failed with a seqid incrementing error -
 704  * see comments nfs_fs.h:seqid_mutating_error()
 705  */
 706 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 707 {
 708         nfs_increment_seqid(status, seqid);
 709 }
 710
 711 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 712 {
 713         struct rpc_sequence *sequence = seqid->sequence->sequence;
 714         int status = 0;
 715
 716         if (sequence->list.next == &seqid->list)
 717                 goto out;
 718         spin_lock(&sequence->lock);
 719         if (sequence->list.next != &seqid->list) {
 720                 rpc_sleep_on(&sequence->wait, task, NULL, NULL);
 721                 status = -EAGAIN;
 722         }
 723         spin_unlock(&sequence->lock);
 724 out:
 725         return status;
 726 }
 727
 728 static int reclaimer(void *);
 729
 730 static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 731 {
 732         smp_mb__before_clear_bit();
 733         clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
 734         smp_mb__after_clear_bit();
 735         wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER);
 736         rpc_wake_up(&clp->cl_rpcwaitq);
 737 }
 738
 739 /*
 740  * State recovery routine
 741  */
 742 static void nfs4_recover_state(struct nfs_client *clp)
 743 {
 744         struct task_struct *task;
 745
 746         __module_get(THIS_MODULE);
 747         atomic_inc(&clp->cl_count);
 748         task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
 749                         NIPQUAD(clp->cl_addr.sin_addr));
 750         if (!IS_ERR(task))
 751                 return;
 752         nfs4_clear_recover_bit(clp);
 753         nfs_put_client(clp);
 754         module_put(THIS_MODULE);
 755 }
 756
 757 /*
 758  * Schedule a state recovery attempt
 759  */
 760 void nfs4_schedule_state_recovery(struct nfs_client *clp)
 761 {
 762         if (!clp)
 763                 return;
 764         if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
 765                 nfs4_recover_state(clp);
 766 }
 767
 768 static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
 769 {
 770         struct inode *inode = state->inode;
 771         struct file_lock *fl;
 772         int status = 0;
 773
 774         for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
 775                 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 776                         continue;
 777                 if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
 778                         continue;
 779                 status = ops->recover_lock(state, fl);
 780                 if (status >= 0)
 781                         continue;
 782                 switch (status) {
 783                         default:
 784                                 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
 785                                                 __FUNCTION__, status);
 786                         case -NFS4ERR_EXPIRED:
 787                         case -NFS4ERR_NO_GRACE:
 788                         case -NFS4ERR_RECLAIM_BAD:
 789                         case -NFS4ERR_RECLAIM_CONFLICT:
 790                                 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
 791                                 break;
 792                         case -NFS4ERR_STALE_CLIENTID:
 793                                 goto out_err;
 794                 }
 795         }
 796         return 0;
 797 out_err:
 798         return status;
 799 }
 800
 801 static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
 802 {
 803         struct nfs4_state *state;
 804         struct nfs4_lock_state *lock;
 805         int status = 0;
 806
 807         /* Note: we rely on the sp->so_states list being ordered
 808          * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
 809          * states first.
 810          * This is needed to ensure that the server won't give us any
 811          * read delegations that we have to return if, say, we are
 812          * recovering after a network partition or a reboot from a
 813          * server that doesn't support a grace period.
 814          */
 815         list_for_each_entry(state, &sp->so_states, open_states) {
 816                 if (state->state == 0)
 817                         continue;
 818                 status = ops->recover_open(sp, state);
 819                 if (status >= 0) {
 820                         status = nfs4_reclaim_locks(ops, state);
 821                         if (status < 0)
 822                                 goto out_err;
 823                         list_for_each_entry(lock, &state->lock_states, ls_locks) {
 824                                 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
 825                                         printk("%s: Lock reclaim failed!\n",
 826                                                         __FUNCTION__);
 827                         }
 828                         continue;
 829                 }
 830                 switch (status) {
 831                         default:
 832                                 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
 833                                                 __FUNCTION__, status);
 834                         case -ENOENT:
 835                         case -NFS4ERR_RECLAIM_BAD:
 836                         case -NFS4ERR_RECLAIM_CONFLICT:
 837                                 /*
 838                                  * Open state on this file cannot be recovered
 839                                  * All we can do is revert to using the zero stateid.
 840                                  */
 841                                 memset(state->stateid.data, 0,
 842                                         sizeof(state->stateid.data));
 843                                 /* Mark the file as being 'closed' */
 844                                 state->state = 0;
 845                                 break;
 846                         case -NFS4ERR_EXPIRED:
 847                         case -NFS4ERR_NO_GRACE:
 848                         case -NFS4ERR_STALE_CLIENTID:
 849                                 goto out_err;
 850                 }
 851         }
 852         return 0;
 853 out_err:
 854         return status;
 855 }
 856
 857 static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 858 {
 859         struct nfs4_state_owner *sp;
 860         struct rb_node *pos;
 861         struct nfs4_state *state;
 862         struct nfs4_lock_state *lock;
 863
 864         /* Reset all sequence ids to zero */
 865         for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
 866                 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 867                 sp->so_seqid.counter = 0;
 868                 sp->so_seqid.flags = 0;
 869                 spin_lock(&sp->so_lock);
 870                 list_for_each_entry(state, &sp->so_states, open_states) {
 871                         clear_bit(NFS_DELEGATED_STATE, &state->flags);
 872                         clear_bit(NFS_O_RDONLY_STATE, &state->flags);
 873                         clear_bit(NFS_O_WRONLY_STATE, &state->flags);
 874                         clear_bit(NFS_O_RDWR_STATE, &state->flags);
 875                         list_for_each_entry(lock, &state->lock_states, ls_locks) {
 876                                 lock->ls_seqid.counter = 0;
 877                                 lock->ls_seqid.flags = 0;
 878                                 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
 879                         }
 880                 }
 881                 spin_unlock(&sp->so_lock);
 882         }
 883 }
 884
 885 static int reclaimer(void *ptr)
 886 {
 887         struct nfs_client *clp = ptr;
 888         struct nfs4_state_owner *sp;
 889         struct rb_node *pos;
 890         struct nfs4_state_recovery_ops *ops;
 891         struct rpc_cred *cred;
 892         int status = 0;
 893
 894         allow_signal(SIGKILL);
 895
 896         /* Ensure exclusive access to NFSv4 state */
 897         lock_kernel();
 898         down_write(&clp->cl_sem);
 899         /* Are there any NFS mounts out there? */
 900         if (list_empty(&clp->cl_superblocks))
 901                 goto out;
 902 restart_loop:
 903         ops = &nfs4_network_partition_recovery_ops;
 904         /* Are there any open files on this volume? */
 905         cred = nfs4_get_renew_cred(clp);
 906         if (cred != NULL) {
 907                 /* Yes there are: try to renew the old lease */
 908                 status = nfs4_proc_renew(clp, cred);
 909                 switch (status) {
 910                         case 0:
 911                         case -NFS4ERR_CB_PATH_DOWN:
 912                                 put_rpccred(cred);
 913                                 goto out;
 914                         case -NFS4ERR_STALE_CLIENTID:
 915                         case -NFS4ERR_LEASE_MOVED:
 916                                 ops = &nfs4_reboot_recovery_ops;
 917                 }
 918         } else {
 919                 /* "reboot" to ensure we clear all state on the server */
 920                 clp->cl_boot_time = CURRENT_TIME;
 921                 cred = nfs4_get_setclientid_cred(clp);
 922         }
 923         /* We're going to have to re-establish a clientid */
 924         nfs4_state_mark_reclaim(clp);
 925         status = -ENOENT;
 926         if (cred != NULL) {
 927                 status = nfs4_init_client(clp, cred);
 928                 put_rpccred(cred);
 929         }
 930         if (status)
 931                 goto out_error;
 932         /* Mark all delegations for reclaim */
 933         nfs_delegation_mark_reclaim(clp);
 934         /* Note: list is protected by exclusive lock on cl->cl_sem */
 935         for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
 936                 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 937                 status = nfs4_reclaim_open_state(ops, sp);
 938                 if (status < 0) {
 939                         if (status == -NFS4ERR_NO_GRACE) {
 940                                 ops = &nfs4_network_partition_recovery_ops;
 941                                 status = nfs4_reclaim_open_state(ops, sp);
 942                         }
 943                         if (status == -NFS4ERR_STALE_CLIENTID)
 944                                 goto restart_loop;
 945                         if (status == -NFS4ERR_EXPIRED)
 946                                 goto restart_loop;
 947                 }
 948         }
 949         nfs_delegation_reap_unclaimed(clp);
 950 out:
 951         up_write(&clp->cl_sem);
 952         unlock_kernel();
 953         if (status == -NFS4ERR_CB_PATH_DOWN)
 954                 nfs_handle_cb_pathdown(clp);
 955         nfs4_clear_recover_bit(clp);
 956         nfs_put_client(clp);
 957         module_put_and_exit(0);
 958         return 0;
 959 out_error:
 960         printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
 961                                 NIPQUAD(clp->cl_addr.sin_addr), -status);
 962         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 963         goto out;
 964 }
 965
 966 /*
 967  * Local variables:
 968  *  c-basic-offset: 8
 969  * End:
 970  */