fs/nfs/write.c

   1 /*
   2  * linux/fs/nfs/write.c
   3  *
   4  * Writing file data over NFS.
   5  *
   6  * We do it like this: When a (user) process wishes to write data to an
   7  * NFS file, a write request is allocated that contains the RPC task data
   8  * plus some info on the page to be written, and added to the inode's
   9  * write chain. If the process writes past the end of the page, an async
  10  * RPC call to write the page is scheduled immediately; otherwise, the call
  11  * is delayed for a few seconds.
  12  *
  13  * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
  14  *
  15  * Write requests are kept on the inode's writeback list. Each entry in
  16  * that list references the page (portion) to be written. When the
  17  * cache timeout has expired, the RPC task is woken up, and tries to
  18  * lock the page. As soon as it manages to do so, the request is moved
  19  * from the writeback list to the writelock list.
  20  *
  21  * Note: we must make sure never to confuse the inode passed in the
  22  * write_page request with the one in page->inode. As far as I understand
  23  * it, these are different when doing a swap-out.
  24  *
  25  * To understand everything that goes on here and in the NFS read code,
  26  * one should be aware that a page is locked in exactly one of the following
  27  * cases:
  28  *
  29  *  -   A write request is in progress.
  30  *  -   A user process is in generic_file_write/nfs_update_page
  31  *  -   A user process is in generic_file_read
  32  *
  33  * Also note that because of the way pages are invalidated in
  34  * nfs_revalidate_inode, the following assertions hold:
  35  *
  36  *  -   If a page is dirty, there will be no read requests (a page will
  37  *      not be re-read unless invalidated by nfs_revalidate_inode).
  38  *  -   If the page is not uptodate, there will be no pending write
  39  *      requests, and no process will be in nfs_update_page.
  40  *
  41  * FIXME: Interaction with the vmscan routines is not optimal yet.
  42  * Either vmscan must be made nfs-savvy, or we need a different page
  43  * reclaim concept that supports something like FS-independent
  44  * buffer_heads with a b_ops-> field.
  45  *
  46  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
  47  */
  48
  49 #include <linux/config.h>
  50 #include <linux/types.h>
  51 #include <linux/malloc.h>
  52 #include <linux/swap.h>
  53 #include <linux/pagemap.h>
  54
  55 #include <linux/sunrpc/clnt.h>
  56 #include <linux/nfs_fs.h>
  57 #include <asm/uaccess.h>
  58
  59 /*
  60  * NOTE! We must NOT default to soft-mounting: that breaks too many
  61  * programs that depend on POSIX behaviour of uninterruptible reads
  62  * and writes.
  63  *
  64  * Until we have a per-mount soft/hard mount policy that we can honour
  65  * we must default to hard mounting!
  66  *
  67  * And yes, this should be "interruptible", not soft.
  68  */
  69 #define IS_SOFT 0
  70
  71 #define NFS_PARANOIA 1
  72 #define NFSDBG_FACILITY         NFSDBG_PAGECACHE
  73
  74 static void                     nfs_wback_lock(struct rpc_task *task);
  75 static void                     nfs_wback_result(struct rpc_task *task);
  76
  77 /*
  78  * Cache parameters
  79  */
  80 #define NFS_WRITEBACK_DELAY     (10 * HZ)
  81 #define NFS_WRITEBACK_MAX       64
  82
  83 /*
  84  * Limit number of delayed writes
  85  */
  86 static int                      nr_write_requests = 0;
  87 static int                      nr_failed_requests = 0;
  88 static struct rpc_wait_queue    write_queue = RPC_INIT_WAITQ("write_chain");
  89 struct nfs_wreq *               nfs_failed_requests = NULL;
  90
  91 /* Hack for future NFS swap support */
  92 #ifndef IS_SWAPFILE
  93 # define IS_SWAPFILE(inode)     (0)
  94 #endif
  95
  96 /*
  97  * Unlock a page after writing it
  98  */
  99 static inline void
 100 nfs_unlock_page(struct page *page)
 101 {
 102         dprintk("NFS:      unlock %ld\n", page->offset);
 103         clear_bit(PG_locked, &page->flags);
 104         wake_up(&page->wait);
 105
 106 #ifdef CONFIG_NFS_SWAP
 107         /* async swap-out support */
 108         if (test_and_clear_bit(PG_decr_after, &page->flags))
 109                 atomic_dec(&page->count);
 110         if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) {
 111                 /*
 112                  * We're doing a swap, so check that this page is
 113                  * swap-cached and do the necessary cleanup.
 114                  */
 115                 swap_after_unlock_page(page->offset);
 116         }
 117 #endif
 118 }
 119
 120 /*
 121  * Transfer a page lock to a write request waiting for it.
 122  */
 123 static inline void
 124 transfer_page_lock(struct nfs_wreq *req)
 125 {
 126         dprintk("NFS:      transfer_page_lock\n");
 127
 128         req->wb_flags &= ~NFS_WRITE_WANTLOCK;
 129         req->wb_flags |= NFS_WRITE_LOCKED;
 130         rpc_wake_up_task(&req->wb_task);
 131
 132         dprintk("NFS:      wake up task %d (flags %x)\n",
 133                         req->wb_task.tk_pid, req->wb_flags);
 134 }
 135
 136 /*
 137  * Write a page synchronously.
 138  * Offset is the data offset within the page.
 139  */
 140 static int
 141 nfs_writepage_sync(struct dentry *dentry, struct inode *inode,
 142                 struct page *page, unsigned long offset, unsigned int count)
 143 {
 144         unsigned int    wsize = NFS_SERVER(inode)->wsize;
 145         int             result, refresh = 0, written = 0;
 146         u8              *buffer;
 147         struct nfs_fattr fattr;
 148
 149         dprintk("NFS:      nfs_writepage_sync(%s/%s %d@%ld)\n",
 150                 dentry->d_parent->d_name.name, dentry->d_name.name,
 151                 count, page->offset + offset);
 152
 153         buffer = (u8 *) page_address(page) + offset;
 154         offset += page->offset;
 155
 156         do {
 157                 if (count < wsize && !IS_SWAPFILE(inode))
 158                         wsize = count;
 159
 160                 result = nfs_proc_write(NFS_DSERVER(dentry), NFS_FH(dentry),
 161                                         IS_SWAPFILE(inode), offset, wsize,
 162                                         buffer, &fattr);
 163
 164                 if (result < 0) {
 165                         /* Must mark the page invalid after I/O error */
 166                         clear_bit(PG_uptodate, &page->flags);
 167                         goto io_error;
 168                 }
 169                 if (result != wsize)
 170                         printk("NFS: short write, wsize=%u, result=%d\n",
 171                         wsize, result);
 172                 refresh = 1;
 173                 buffer  += wsize;
 174                 offset  += wsize;
 175                 written += wsize;
 176                 count   -= wsize;
 177                 /*
 178                  * If we've extended the file, update the inode
 179                  * now so we don't invalidate the cache.
 180                  */
 181                 if (offset > inode->i_size)
 182                         inode->i_size = offset;
 183         } while (count);
 184
 185 io_error:
 186         /* Note: we don't refresh if the call failed (fattr invalid) */
 187         if (refresh && result >= 0) {
 188                 /* See comments in nfs_wback_result */
 189                 /* N.B. I don't think this is right -- sync writes in order */
 190                 if (fattr.size < inode->i_size)
 191                         fattr.size = inode->i_size;
 192                 if (fattr.mtime.seconds < inode->i_mtime)
 193                         printk("nfs_writepage_sync: prior time??\n");
 194                 /* Solaris 2.5 server seems to send garbled
 195                  * fattrs occasionally */
 196                 if (inode->i_ino == fattr.fileid) {
 197                         /*
 198                          * We expect the mtime value to change, and
 199                          * don't want to invalidate the caches.
 200                          */
 201                         inode->i_mtime = fattr.mtime.seconds;
 202                         nfs_refresh_inode(inode, &fattr);
 203                 }
 204                 else
 205                         printk("nfs_writepage_sync: inode %ld, got %u?\n",
 206                                 inode->i_ino, fattr.fileid);
 207         }
 208
 209         nfs_unlock_page(page);
 210         return written? written : result;
 211 }
 212
 213 /*
 214  * Append a writeback request to a list
 215  */
 216 static inline void
 217 append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
 218 {
 219         dprintk("NFS:      append_write_request(%p, %p)\n", q, wreq);
 220         rpc_append_list(q, wreq);
 221 }
 222
 223 /*
 224  * Remove a writeback request from a list
 225  */
 226 static inline void
 227 remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
 228 {
 229         dprintk("NFS:      remove_write_request(%p, %p)\n", q, wreq);
 230         rpc_remove_list(q, wreq);
 231 }
 232
 233 /*
 234  * Find a write request for a given page
 235  */
 236 static inline struct nfs_wreq *
 237 find_write_request(struct inode *inode, struct page *page)
 238 {
 239         struct nfs_wreq *head, *req;
 240
 241         dprintk("NFS:      find_write_request(%x/%ld, %p)\n",
 242                                 inode->i_dev, inode->i_ino, page);
 243         if (!(req = head = NFS_WRITEBACK(inode)))
 244                 return NULL;
 245         do {
 246                 if (req->wb_page == page)
 247                         return req;
 248         } while ((req = WB_NEXT(req)) != head);
 249         return NULL;
 250 }
 251
 252 /*
 253  * Find any requests for the specified dentry.
 254  */
 255 int
 256 nfs_find_dentry_request(struct inode *inode, struct dentry *dentry)
 257 {
 258         struct nfs_wreq *head, *req;
 259         int found = 0;
 260
 261         req = head = NFS_WRITEBACK(inode);
 262         while (req != NULL) {
 263                 if (req->wb_dentry == dentry) {
 264                         found = 1;
 265                         break;
 266                 }
 267                 if ((req = WB_NEXT(req)) == head)
 268                         break;
 269         }
 270         return found;
 271 }
 272
 273 /*
 274  * Find a failed write request by pid
 275  */
 276 static struct nfs_wreq *
 277 find_failed_request(struct inode *inode, pid_t pid)
 278 {
 279         struct nfs_wreq *head, *req;
 280
 281         req = head = nfs_failed_requests;
 282         while (req != NULL) {
 283                 if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid))
 284                         return req;
 285                 if ((req = WB_NEXT(req)) == head)
 286                         break;
 287         }
 288         return NULL;
 289 }
 290
 291 /*
 292  * Add a request to the failed list.
 293  */
 294 static void
 295 append_failed_request(struct nfs_wreq * req)
 296 {
 297         static int old_max = 16;
 298
 299         append_write_request(&nfs_failed_requests, req);
 300         nr_failed_requests++;
 301         if (nr_failed_requests >= old_max) {
 302                 printk("NFS: %d failed requests\n", nr_failed_requests);
 303                 old_max = old_max << 1;
 304         }
 305 }
 306
 307 /*
 308  * Remove a request from the failed list and free it.
 309  */
 310 static void
 311 remove_failed_request(struct nfs_wreq * req)
 312 {
 313         remove_write_request(&nfs_failed_requests, req);
 314         kfree(req);
 315         nr_failed_requests--;
 316 }
 317
 318 /*
 319  * Find and release all failed requests for this inode.
 320  */
 321 int
 322 nfs_check_failed_request(struct inode * inode)
 323 {
 324         struct nfs_wreq * req;
 325         int found = 0;
 326
 327         while ((req = find_failed_request(inode, 0)) != NULL) {
 328                 remove_failed_request(req);
 329                 found++;
 330         }
 331         return found;
 332 }
 333
 334 /*
 335  * Try to merge adjacent write requests. This works only for requests
 336  * issued by the same user.
 337  */
 338 static inline int
 339 update_write_request(struct nfs_wreq *req, unsigned int first,
 340                         unsigned int bytes)
 341 {
 342         unsigned int    rqfirst = req->wb_offset,
 343                         rqlast = rqfirst + req->wb_bytes,
 344                         last = first + bytes;
 345
 346         dprintk("nfs:      trying to update write request %p\n", req);
 347
 348         /* Check the credentials associated with this write request.
 349          * If the buffer is owned by the same user, we can happily
 350          * add our data without risking server permission problems.
 351          * Note that I'm not messing around with RPC root override creds
 352          * here, because they're used by swap requests only which
 353          * always write out full pages. */
 354         if (!rpcauth_matchcred(&req->wb_task, req->wb_task.tk_cred)) {
 355                 dprintk("NFS:      update failed (cred mismatch)\n");
 356                 return 0;
 357         }
 358
 359         if (first < rqfirst)
 360                 rqfirst = first;
 361         if (rqlast < last)
 362                 rqlast = last;
 363         req->wb_offset = rqfirst;
 364         req->wb_bytes  = rqlast - rqfirst;
 365
 366         return 1;
 367 }
 368
 369 /*
 370  * Create and initialize a writeback request
 371  */
 372 static inline struct nfs_wreq *
 373 create_write_request(struct dentry *dentry, struct inode *inode,
 374                 struct page *page, unsigned int offset, unsigned int bytes)
 375 {
 376         struct rpc_clnt *clnt = NFS_CLIENT(inode);
 377         struct nfs_wreq *wreq;
 378         struct rpc_task *task;
 379
 380         dprintk("NFS:      create_write_request(%s/%s, %ld+%d)\n",
 381                 dentry->d_parent->d_name.name, dentry->d_name.name,
 382                 page->offset + offset, bytes);
 383
 384         /* FIXME: Enforce hard limit on number of concurrent writes? */
 385
 386         wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER);
 387         if (!wreq)
 388                 goto out_fail;
 389         memset(wreq, 0, sizeof(*wreq));
 390
 391         task = &wreq->wb_task;
 392         rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
 393         task->tk_calldata = wreq;
 394         task->tk_action = nfs_wback_lock;
 395
 396         rpcauth_lookupcred(task);       /* Obtain user creds */
 397         if (task->tk_status < 0)
 398                 goto out_req;
 399
 400         /* Put the task on inode's writeback request list. */
 401         wreq->wb_dentry = dentry;
 402         wreq->wb_inode  = inode;
 403         wreq->wb_pid    = current->pid;
 404         wreq->wb_page   = page;
 405         wreq->wb_offset = offset;
 406         wreq->wb_bytes  = bytes;
 407
 408         atomic_inc(&page->count);
 409
 410         append_write_request(&NFS_WRITEBACK(inode), wreq);
 411
 412         if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
 413                 rpc_wake_up_next(&write_queue);
 414
 415         return wreq;
 416
 417 out_req:
 418         rpc_release_task(task);
 419         kfree(wreq);
 420 out_fail:
 421         return NULL;
 422 }
 423
 424 /*
 425  * Schedule a writeback RPC call.
 426  * If the server is congested, don't add to our backlog of queued
 427  * requests but call it synchronously.
 428  * The function returns false if the page has been unlocked as the
 429  * consequence of a synchronous write call.
 430  *
 431  * FIXME: Here we could walk the inode's lock list to see whether the
 432  * page we're currently writing to has been write-locked by the caller.
 433  * If it is, we could schedule an async write request with a long
 434  * delay in order to avoid writing back the page until the lock is
 435  * released.
 436  */
 437 static inline int
 438 schedule_write_request(struct nfs_wreq *req, int sync)
 439 {
 440         struct rpc_task *task = &req->wb_task;
 441         struct inode    *inode = req->wb_inode;
 442
 443         if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
 444                 sync = 1;
 445
 446         if (sync) {
 447                 sigset_t        oldmask;
 448                 struct rpc_clnt *clnt = NFS_CLIENT(inode);
 449                 dprintk("NFS: %4d schedule_write_request (sync)\n",
 450                                         task->tk_pid);
 451                 /* Page is already locked */
 452                 req->wb_flags |= NFS_WRITE_LOCKED;
 453                 rpc_clnt_sigmask(clnt, &oldmask);
 454                 rpc_execute(task);
 455                 rpc_clnt_sigunmask(clnt, &oldmask);
 456         } else {
 457                 dprintk("NFS: %4d schedule_write_request (async)\n",
 458                                         task->tk_pid);
 459                 task->tk_flags |= RPC_TASK_ASYNC;
 460                 task->tk_timeout = NFS_WRITEBACK_DELAY;
 461                 rpc_sleep_on(&write_queue, task, NULL, NULL);
 462         }
 463
 464         return sync == 0;
 465 }
 466
 467 /*
 468  * Wait for request to complete
 469  * This is almost a copy of __wait_on_page
 470  */
 471 static inline int
 472 wait_on_write_request(struct nfs_wreq *req)
 473 {
 474         struct wait_queue       wait = { current, NULL };
 475         struct page             *page = req->wb_page;
 476         int retval;
 477         sigset_t                oldmask;
 478         struct rpc_clnt         *clnt = NFS_CLIENT(req->wb_inode);
 479
 480         rpc_clnt_sigmask(clnt, &oldmask);
 481         add_wait_queue(&page->wait, &wait);
 482         atomic_inc(&page->count);
 483         for (;;) {
 484                 current->state = TASK_INTERRUPTIBLE;
 485                 retval = 0;
 486                 if (!PageLocked(page))
 487                         break;
 488                 retval = -ERESTARTSYS;
 489                 if (signalled())
 490                         break;
 491                 schedule();
 492         }
 493         remove_wait_queue(&page->wait, &wait);
 494         current->state = TASK_RUNNING;
 495         /* N.B. page may have been unused, so we must use free_page() */
 496         free_page(page_address(page));
 497         rpc_clnt_sigunmask(clnt, &oldmask);
 498         return retval;
 499 }
 500
 501 /*
 502  * Write a page to the server. This will be used for NFS swapping only
 503  * (for now), and we currently do this synchronously only.
 504  */
 505 int
 506 nfs_writepage(struct file * file, struct page *page)
 507 {
 508         struct dentry *dentry = file->f_dentry;
 509         return nfs_writepage_sync(dentry, dentry->d_inode, page, 0, PAGE_SIZE);
 510 }
 511
 512 /*
 513  * Update and possibly write a cached page of an NFS file.
 514  *
 515  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 516  * things with a page scheduled for an RPC call (e.g. invalidate it).
 517  */
 518 int
 519 nfs_updatepage(struct file *file, struct page *page, const char *buffer,
 520                         unsigned long offset, unsigned int count, int sync)
 521 {
 522         struct dentry   *dentry = file->f_dentry;
 523         struct inode    *inode = dentry->d_inode;
 524         u8              *page_addr = (u8 *) page_address(page);
 525         struct nfs_wreq *req;
 526         int             status = 0, page_locked = 1;
 527
 528         dprintk("NFS:      nfs_updatepage(%s/%s %d@%ld, sync=%d)\n",
 529                 dentry->d_parent->d_name.name, dentry->d_name.name,
 530                 count, page->offset+offset, sync);
 531
 532         set_bit(PG_locked, &page->flags);
 533
 534         /*
 535          * Try to find a corresponding request on the writeback queue.
 536          * If there is one, we can be sure that this request is not
 537          * yet being processed, because we hold a lock on the page.
 538          *
 539          * If the request was created by us, update it. Otherwise,
 540          * transfer the page lock and flush out the dirty page now.
 541          * After returning, generic_file_write will wait on the
 542          * page and retry the update.
 543          */
 544         if ((req = find_write_request(inode, page)) != NULL) {
 545                 if (update_write_request(req, offset, count)) {
 546                         /* N.B. check for a fault here and cancel the req */
 547                         /*
 548                          *      SECURITY - copy_from_user must zero the
 549                          *      rest of the data after a fault!
 550                          */
 551                         copy_from_user(page_addr + offset, buffer, count);
 552                         goto updated;
 553                 }
 554                 dprintk("NFS:      wake up conflicting write request.\n");
 555                 transfer_page_lock(req);
 556                 return 0;
 557         }
 558
 559         /* Copy data to page buffer. */
 560         status = -EFAULT;
 561         if (copy_from_user(page_addr + offset, buffer, count))
 562                 goto done;
 563
 564         /* If wsize is smaller than page size, update and write
 565          * page synchronously.
 566          */
 567         if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
 568                 return nfs_writepage_sync(dentry, inode, page, offset, count);
 569
 570         /* Create the write request. */
 571         status = -ENOBUFS;
 572         req = create_write_request(dentry, inode, page, offset, count);
 573         if (!req)
 574                 goto done;
 575
 576         /* Schedule request */
 577         page_locked = schedule_write_request(req, sync);
 578
 579 updated:
 580         /*
 581          * If we wrote up to the end of the chunk, transmit request now.
 582          * We should be a bit more intelligent about detecting whether a
 583          * process accesses the file sequentially or not.
 584          */
 585         if (page_locked && (offset + count >= PAGE_SIZE || sync))
 586                 req->wb_flags |= NFS_WRITE_WANTLOCK;
 587
 588         /* If the page was written synchronously, return any error that
 589          * may have happened; otherwise return the write count. */
 590         if (page_locked || (status = nfs_write_error(inode)) >= 0)
 591                 status = count;
 592
 593 done:
 594         /* Unlock page and wake up anyone sleeping on it */
 595         if (page_locked) {
 596                 if (req && WB_WANTLOCK(req)) {
 597                         transfer_page_lock(req);
 598                         /* rpc_execute(&req->wb_task); */
 599                         if (sync) {
 600                                 /* N.B. if signalled, result not ready? */
 601                                 wait_on_write_request(req);
 602                                 if ((count = nfs_write_error(inode)) < 0)
 603                                         status = count;
 604                         }
 605                 } else {
 606                         if (status < 0) {
 607 printk("NFS: %s/%s write failed, clearing bit\n",
 608 dentry->d_parent->d_name.name, dentry->d_name.name);
 609                                 clear_bit(PG_uptodate, &page->flags);
 610                         }
 611                         nfs_unlock_page(page);
 612                 }
 613         }
 614
 615         dprintk("NFS:      nfs_updatepage returns %d (isize %ld)\n",
 616                                                 status, inode->i_size);
 617         return status;
 618 }
 619
 620 /*
 621  * Flush out a dirty page.
 622  */
 623 static void
 624 nfs_flush_request(struct nfs_wreq *req)
 625 {
 626         struct page     *page = req->wb_page;
 627
 628 #ifdef NFS_DEBUG_VERBOSE
 629 if (req->wb_inode != page->inode)
 630 printk("NFS: inode %ld no longer has page %p\n", req->wb_inode->i_ino, page);
 631 #endif
 632         dprintk("NFS:      nfs_flush_request(%s/%s, @%ld)\n",
 633                 req->wb_dentry->d_parent->d_name.name,
 634                 req->wb_dentry->d_name.name, page->offset);
 635
 636         req->wb_flags |= NFS_WRITE_WANTLOCK;
 637         if (!test_and_set_bit(PG_locked, &page->flags)) {
 638                 transfer_page_lock(req);
 639         } else {
 640                 printk(KERN_WARNING "NFS oops in %s: can't lock page!\n",
 641                                         __FUNCTION__);
 642                 rpc_wake_up_task(&req->wb_task);
 643         }
 644 }
 645
 646 /*
 647  * Flush writeback requests. See nfs_flush_dirty_pages for details.
 648  */
 649 static struct nfs_wreq *
 650 nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
 651                                                 int invalidate)
 652 {
 653         struct nfs_wreq *head, *req, *last = NULL;
 654         off_t           rqoffset, rqend, end;
 655
 656         end = len? offset + len : 0x7fffffffUL;
 657
 658         req = head = NFS_WRITEBACK(inode);
 659         while (req != NULL) {
 660                 dprintk("NFS: %4d nfs_flush inspect %s/%s @%ld fl %x\n",
 661                         req->wb_task.tk_pid,
 662                         req->wb_dentry->d_parent->d_name.name,
 663                         req->wb_dentry->d_name.name,
 664                         req->wb_page->offset, req->wb_flags);
 665
 666                 rqoffset = req->wb_page->offset + req->wb_offset;
 667                 rqend    = rqoffset + req->wb_bytes;
 668                 if (rqoffset < end && offset < rqend &&
 669                     (pid == 0 || req->wb_pid == pid)) {
 670                         if (!WB_INPROGRESS(req) && !WB_HAVELOCK(req)) {
 671 #ifdef NFS_DEBUG_VERBOSE
 672 printk("nfs_flush: flushing inode=%ld, %d @ %lu\n",
 673 req->wb_inode->i_ino, req->wb_bytes, rqoffset);
 674 #endif
 675                                 nfs_flush_request(req);
 676                         }
 677                         last = req;
 678                 }
 679                 if (invalidate)
 680                         req->wb_flags |= NFS_WRITE_INVALIDATE;
 681                 if ((req = WB_NEXT(req)) == head)
 682                         break;
 683         }
 684
 685         return last;
 686 }
 687
 688 /*
 689  * Cancel a write request. We always mark it cancelled,
 690  * but if it's already in progress there's no point in
 691  * calling rpc_exit, and we don't want to overwrite the
 692  * tk_status field.
 693  */
 694 static void
 695 nfs_cancel_request(struct nfs_wreq *req)
 696 {
 697         req->wb_flags |= NFS_WRITE_CANCELLED;
 698         if (!WB_INPROGRESS(req)) {
 699                 rpc_exit(&req->wb_task, 0);
 700                 rpc_wake_up_task(&req->wb_task);
 701         }
 702 }
 703
 704 /*
 705  * Cancel all writeback requests, both pending and in progress.
 706  */
 707 static void
 708 nfs_cancel_dirty(struct inode *inode, pid_t pid)
 709 {
 710         struct nfs_wreq *head, *req;
 711
 712         req = head = NFS_WRITEBACK(inode);
 713         while (req != NULL) {
 714                 if (pid == 0 || req->wb_pid == pid)
 715                         nfs_cancel_request(req);
 716                 if ((req = WB_NEXT(req)) == head)
 717                         break;
 718         }
 719 }
 720
 721 /*
 722  * Flush out all dirty pages belonging to a certain user process and
 723  * maybe wait for the RPC calls to complete.
 724  *
 725  * Another purpose of this function is sync()ing a file range before a
 726  * write lock is released. This is what offset and length are for, even if
 727  * this isn't used by the nlm module yet.
 728  */
 729 int
 730 nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len)
 731 {
 732         struct nfs_wreq *last = NULL;
 733         int result = 0, cancel = 0;
 734
 735         dprintk("NFS:      flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n",
 736                 inode->i_dev, inode->i_ino, current->pid, offset, len);
 737
 738         if (IS_SOFT && signalled()) {
 739                 nfs_cancel_dirty(inode, pid);
 740                 cancel = 1;
 741         }
 742
 743         for (;;) {
 744                 if (IS_SOFT && signalled()) {
 745                         if (!cancel)
 746                                 nfs_cancel_dirty(inode, pid);
 747                         result = -ERESTARTSYS;
 748                         break;
 749                 }
 750
 751                 /* Flush all pending writes for the pid and file region */
 752                 last = nfs_flush_pages(inode, pid, offset, len, 0);
 753                 if (last == NULL)
 754                         break;
 755                 wait_on_write_request(last);
 756         }
 757
 758         return result;
 759 }
 760
 761 /*
 762  * Flush out any pending write requests and flag that they be discarded
 763  * after the write is complete.
 764  *
 765  * This function is called from nfs_refresh_inode just before it calls
 766  * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure
 767  * that all dirty pages are locked, so that invalidate_inode_pages does
 768  * not throw away any dirty pages.
 769  */
 770 void
 771 nfs_invalidate_pages(struct inode *inode)
 772 {
 773         dprintk("NFS:      nfs_invalidate_pages(%x/%ld)\n",
 774                                 inode->i_dev, inode->i_ino);
 775
 776         nfs_flush_pages(inode, 0, 0, 0, 1);
 777 }
 778
 779 /*
 780  * Cancel any pending write requests after a given offset
 781  * (called from nfs_notify_change).
 782  */
 783 int
 784 nfs_truncate_dirty_pages(struct inode *inode, unsigned long offset)
 785 {
 786         struct nfs_wreq *req, *head;
 787         unsigned long   rqoffset;
 788
 789         dprintk("NFS:      truncate_dirty_pages(%d/%ld, %ld)\n",
 790                 inode->i_dev, inode->i_ino, offset);
 791
 792         req = head = NFS_WRITEBACK(inode);
 793         while (req != NULL) {
 794                 rqoffset = req->wb_page->offset + req->wb_offset;
 795
 796                 if (rqoffset >= offset) {
 797                         nfs_cancel_request(req);
 798                 } else if (rqoffset + req->wb_bytes >= offset) {
 799                         req->wb_bytes = offset - rqoffset;
 800                 }
 801                 if ((req = WB_NEXT(req)) == head)
 802                         break;
 803         }
 804
 805         return 0;
 806 }
 807
 808 /*
 809  * Check if a previous write operation returned an error
 810  */
 811 int
 812 nfs_check_error(struct inode *inode)
 813 {
 814         struct nfs_wreq *req;
 815         int             status = 0;
 816
 817         dprintk("nfs:      checking for write error inode %04x/%ld\n",
 818                         inode->i_dev, inode->i_ino);
 819
 820         req = find_failed_request(inode, current->pid);
 821         if (req) {
 822                 dprintk("nfs: write error %d inode %04x/%ld\n",
 823                         req->wb_task.tk_status, inode->i_dev, inode->i_ino);
 824
 825                 status = req->wb_task.tk_status;
 826                 remove_failed_request(req);
 827         }
 828         return status;
 829 }
 830
 831 /*
 832  * The following procedures make up the writeback finite state machinery:
 833  *
 834  * 1.   Try to lock the page if not yet locked by us,
 835  *      set up the RPC call info, and pass to the call FSM.
 836  */
 837 static void
 838 nfs_wback_lock(struct rpc_task *task)
 839 {
 840         struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
 841         struct page     *page = req->wb_page;
 842         struct dentry   *dentry = req->wb_dentry;
 843
 844         dprintk("NFS: %4d nfs_wback_lock (%s/%s, status=%d flags=%x)\n",
 845                 task->tk_pid, dentry->d_parent->d_name.name,
 846                 dentry->d_name.name, task->tk_status, req->wb_flags);
 847
 848         if (!WB_HAVELOCK(req))
 849                 req->wb_flags |= NFS_WRITE_WANTLOCK;
 850
 851         if (WB_WANTLOCK(req) && test_and_set_bit(PG_locked, &page->flags))
 852                 goto out_locked;
 853         req->wb_flags &= ~NFS_WRITE_WANTLOCK;
 854         req->wb_flags |=  NFS_WRITE_LOCKED;
 855         task->tk_status = 0;
 856
 857         /* Setup the task struct for a writeback call */
 858         req->wb_args.fh     = NFS_FH(dentry);
 859         req->wb_args.offset = page->offset + req->wb_offset;
 860         req->wb_args.count  = req->wb_bytes;
 861         req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
 862
 863         rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
 864
 865         req->wb_flags |= NFS_WRITE_INPROGRESS;
 866         return;
 867
 868 out_locked:
 869         printk("NFS: page already locked in writeback_lock!\n");
 870         task->tk_timeout = 2 * HZ;
 871         rpc_sleep_on(&write_queue, task, NULL, NULL);
 872         return;
 873 }
 874
 875 /*
 876  * 2.   Collect the result
 877  */
 878 static void
 879 nfs_wback_result(struct rpc_task *task)
 880 {
 881         struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
 882         struct inode    *inode = req->wb_inode;
 883         struct page     *page  = req->wb_page;
 884         int             status = task->tk_status;
 885
 886         dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
 887                 task->tk_pid, req->wb_dentry->d_parent->d_name.name,
 888                 req->wb_dentry->d_name.name, status, req->wb_flags);
 889
 890         /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
 891         req->wb_flags |= NFS_WRITE_COMPLETE;
 892         if (status < 0) {
 893                 /*
 894                  * An error occurred. Report the error back to the
 895                  * application by adding the request to the failed
 896                  * requests list.
 897                  */
 898                 if (find_failed_request(inode, req->wb_pid))
 899                         status = 0;
 900                 clear_bit(PG_uptodate, &page->flags);
 901         } else if (!WB_CANCELLED(req)) {
 902                 struct nfs_fattr *fattr = &req->wb_fattr;
 903                 /* Update attributes as result of writeback.
 904                  * Beware: when UDP replies arrive out of order, we
 905                  * may end up overwriting a previous, bigger file size.
 906                  *
 907                  * When the file size shrinks we cancel all pending
 908                  * writebacks.
 909                  */
 910                 if (fattr->mtime.seconds >= inode->i_mtime) {
 911                         if (fattr->size < inode->i_size)
 912                                 fattr->size = inode->i_size;
 913
 914                         /* possible Solaris 2.5 server bug workaround */
 915                         if (inode->i_ino == fattr->fileid) {
 916                                 /*
 917                                  * We expect these values to change, and
 918                                  * don't want to invalidate the caches.
 919                                  */
 920                                 inode->i_size  = fattr->size;
 921                                 inode->i_mtime = fattr->mtime.seconds;
 922                                 nfs_refresh_inode(inode, fattr);
 923                         }
 924                         else
 925                                 printk("nfs_wback_result: inode %ld, got %u?\n",
 926                                         inode->i_ino, fattr->fileid);
 927                 }
 928         }
 929
 930         /*
 931          * This call might block, so we defer removing the request
 932          * from the inode's writeback list.
 933          */
 934         rpc_release_task(task);
 935
 936         if (WB_INVALIDATE(req))
 937                 clear_bit(PG_uptodate, &page->flags);
 938         if (WB_HAVELOCK(req))
 939                 nfs_unlock_page(page);
 940
 941         /*
 942          * Now it's safe to remove the request from the inode's
 943          * writeback list and wake up any tasks sleeping on it.
 944          * If the request failed, add it to the failed list.
 945          */
 946         remove_write_request(&NFS_WRITEBACK(inode), req);
 947
 948         if (status >= 0)
 949                 kfree(req);
 950         else {
 951                 dprintk("NFS: %4d saving write failure code\n", task->tk_pid);
 952                 append_failed_request(req);
 953         }
 954
 955         free_page(page_address(page));
 956         nr_write_requests--;
 957 }