Import 2.1.118
[davej-history.git] / fs / nfs / write.c
blob572c413d007dc5e7974dc85078acb5a71e0ca126
1 /*
2 * linux/fs/nfs/write.c
4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
27 * cases:
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/malloc.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
55 #include <linux/sunrpc/clnt.h>
56 #include <linux/nfs_fs.h>
57 #include <asm/uaccess.h>
60 * NOTE! We must NOT default to soft-mounting: that breaks too many
61 * programs that depend on POSIX behaviour of uninterruptible reads
62 * and writes.
64 * Until we have a per-mount soft/hard mount policy that we can honour
65 * we must default to hard mounting!
67 * And yes, this should be "interruptible", not soft.
69 #define IS_SOFT 0
71 #define NFS_PARANOIA 1
72 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
74 static void nfs_wback_lock(struct rpc_task *task);
75 static void nfs_wback_result(struct rpc_task *task);
78 * Cache parameters
80 #define NFS_WRITEBACK_DELAY (10 * HZ)
81 #define NFS_WRITEBACK_MAX 64
84 * Limit number of delayed writes
86 static int nr_write_requests = 0;
87 static int nr_failed_requests = 0;
88 static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain");
89 struct nfs_wreq * nfs_failed_requests = NULL;
91 /* Hack for future NFS swap support */
92 #ifndef IS_SWAPFILE
93 # define IS_SWAPFILE(inode) (0)
94 #endif
97 * Unlock a page after writing it
99 static inline void
100 nfs_unlock_page(struct page *page)
102 dprintk("NFS: unlock %ld\n", page->offset);
103 clear_bit(PG_locked, &page->flags);
104 wake_up(&page->wait);
106 #ifdef CONFIG_NFS_SWAP
107 /* async swap-out support */
108 if (test_and_clear_bit(PG_decr_after, &page->flags))
109 atomic_dec(&page->count);
110 if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) {
112 * We're doing a swap, so check that this page is
113 * swap-cached and do the necessary cleanup.
115 swap_after_unlock_page(page->offset);
117 #endif
121 * Transfer a page lock to a write request waiting for it.
123 static inline void
124 transfer_page_lock(struct nfs_wreq *req)
126 dprintk("NFS: transfer_page_lock\n");
128 req->wb_flags &= ~NFS_WRITE_WANTLOCK;
129 req->wb_flags |= NFS_WRITE_LOCKED;
130 rpc_wake_up_task(&req->wb_task);
132 dprintk("NFS: wake up task %d (flags %x)\n",
133 req->wb_task.tk_pid, req->wb_flags);
137 * Write a page synchronously.
138 * Offset is the data offset within the page.
140 static int
141 nfs_writepage_sync(struct dentry *dentry, struct inode *inode,
142 struct page *page, unsigned long offset, unsigned int count)
144 unsigned int wsize = NFS_SERVER(inode)->wsize;
145 int result, refresh = 0, written = 0;
146 u8 *buffer;
147 struct nfs_fattr fattr;
149 dprintk("NFS: nfs_writepage_sync(%s/%s %d@%ld)\n",
150 dentry->d_parent->d_name.name, dentry->d_name.name,
151 count, page->offset + offset);
153 buffer = (u8 *) page_address(page) + offset;
154 offset += page->offset;
156 do {
157 if (count < wsize && !IS_SWAPFILE(inode))
158 wsize = count;
160 result = nfs_proc_write(NFS_DSERVER(dentry), NFS_FH(dentry),
161 IS_SWAPFILE(inode), offset, wsize,
162 buffer, &fattr);
164 if (result < 0) {
165 /* Must mark the page invalid after I/O error */
166 clear_bit(PG_uptodate, &page->flags);
167 goto io_error;
169 if (result != wsize)
170 printk("NFS: short write, wsize=%u, result=%d\n",
171 wsize, result);
172 refresh = 1;
173 buffer += wsize;
174 offset += wsize;
175 written += wsize;
176 count -= wsize;
178 * If we've extended the file, update the inode
179 * now so we don't invalidate the cache.
181 if (offset > inode->i_size)
182 inode->i_size = offset;
183 } while (count);
185 io_error:
186 /* Note: we don't refresh if the call failed (fattr invalid) */
187 if (refresh && result >= 0) {
188 /* See comments in nfs_wback_result */
189 /* N.B. I don't think this is right -- sync writes in order */
190 if (fattr.size < inode->i_size)
191 fattr.size = inode->i_size;
192 if (fattr.mtime.seconds < inode->i_mtime)
193 printk("nfs_writepage_sync: prior time??\n");
194 /* Solaris 2.5 server seems to send garbled
195 * fattrs occasionally */
196 if (inode->i_ino == fattr.fileid) {
198 * We expect the mtime value to change, and
199 * don't want to invalidate the caches.
201 inode->i_mtime = fattr.mtime.seconds;
202 nfs_refresh_inode(inode, &fattr);
204 else
205 printk("nfs_writepage_sync: inode %ld, got %u?\n",
206 inode->i_ino, fattr.fileid);
209 nfs_unlock_page(page);
210 return written? written : result;
214 * Append a writeback request to a list
216 static inline void
217 append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
219 dprintk("NFS: append_write_request(%p, %p)\n", q, wreq);
220 rpc_append_list(q, wreq);
224 * Remove a writeback request from a list
226 static inline void
227 remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
229 dprintk("NFS: remove_write_request(%p, %p)\n", q, wreq);
230 rpc_remove_list(q, wreq);
234 * Find a write request for a given page
236 static inline struct nfs_wreq *
237 find_write_request(struct inode *inode, struct page *page)
239 struct nfs_wreq *head, *req;
241 dprintk("NFS: find_write_request(%x/%ld, %p)\n",
242 inode->i_dev, inode->i_ino, page);
243 if (!(req = head = NFS_WRITEBACK(inode)))
244 return NULL;
245 do {
246 if (req->wb_page == page)
247 return req;
248 } while ((req = WB_NEXT(req)) != head);
249 return NULL;
253 * Find any requests for the specified dentry.
256 nfs_find_dentry_request(struct inode *inode, struct dentry *dentry)
258 struct nfs_wreq *head, *req;
259 int found = 0;
261 req = head = NFS_WRITEBACK(inode);
262 while (req != NULL) {
263 if (req->wb_dentry == dentry) {
264 found = 1;
265 break;
267 if ((req = WB_NEXT(req)) == head)
268 break;
270 return found;
274 * Find a failed write request by pid
276 static struct nfs_wreq *
277 find_failed_request(struct inode *inode, pid_t pid)
279 struct nfs_wreq *head, *req;
281 req = head = nfs_failed_requests;
282 while (req != NULL) {
283 if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid))
284 return req;
285 if ((req = WB_NEXT(req)) == head)
286 break;
288 return NULL;
292 * Add a request to the failed list.
294 static void
295 append_failed_request(struct nfs_wreq * req)
297 static int old_max = 16;
299 append_write_request(&nfs_failed_requests, req);
300 nr_failed_requests++;
301 if (nr_failed_requests >= old_max) {
302 printk("NFS: %d failed requests\n", nr_failed_requests);
303 old_max = old_max << 1;
308 * Remove a request from the failed list and free it.
310 static void
311 remove_failed_request(struct nfs_wreq * req)
313 remove_write_request(&nfs_failed_requests, req);
314 kfree(req);
315 nr_failed_requests--;
319 * Find and release all failed requests for this inode.
322 nfs_check_failed_request(struct inode * inode)
324 struct nfs_wreq * req;
325 int found = 0;
327 while ((req = find_failed_request(inode, 0)) != NULL) {
328 remove_failed_request(req);
329 found++;
331 return found;
335 * Try to merge adjacent write requests. This works only for requests
336 * issued by the same user.
338 static inline int
339 update_write_request(struct nfs_wreq *req, unsigned int first,
340 unsigned int bytes)
342 unsigned int rqfirst = req->wb_offset,
343 rqlast = rqfirst + req->wb_bytes,
344 last = first + bytes;
346 dprintk("nfs: trying to update write request %p\n", req);
348 /* Check the credentials associated with this write request.
349 * If the buffer is owned by the same user, we can happily
350 * add our data without risking server permission problems.
351 * Note that I'm not messing around with RPC root override creds
352 * here, because they're used by swap requests only which
353 * always write out full pages. */
354 if (!rpcauth_matchcred(&req->wb_task, req->wb_task.tk_cred)) {
355 dprintk("NFS: update failed (cred mismatch)\n");
356 return 0;
359 if (first < rqfirst)
360 rqfirst = first;
361 if (rqlast < last)
362 rqlast = last;
363 req->wb_offset = rqfirst;
364 req->wb_bytes = rqlast - rqfirst;
366 return 1;
370 * Create and initialize a writeback request
372 static inline struct nfs_wreq *
373 create_write_request(struct dentry *dentry, struct inode *inode,
374 struct page *page, unsigned int offset, unsigned int bytes)
376 struct rpc_clnt *clnt = NFS_CLIENT(inode);
377 struct nfs_wreq *wreq;
378 struct rpc_task *task;
380 dprintk("NFS: create_write_request(%s/%s, %ld+%d)\n",
381 dentry->d_parent->d_name.name, dentry->d_name.name,
382 page->offset + offset, bytes);
384 /* FIXME: Enforce hard limit on number of concurrent writes? */
386 wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER);
387 if (!wreq)
388 goto out_fail;
389 memset(wreq, 0, sizeof(*wreq));
391 task = &wreq->wb_task;
392 rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
393 task->tk_calldata = wreq;
394 task->tk_action = nfs_wback_lock;
396 rpcauth_lookupcred(task); /* Obtain user creds */
397 if (task->tk_status < 0)
398 goto out_req;
400 /* Put the task on inode's writeback request list. */
401 wreq->wb_dentry = dentry;
402 wreq->wb_inode = inode;
403 wreq->wb_pid = current->pid;
404 wreq->wb_page = page;
405 wreq->wb_offset = offset;
406 wreq->wb_bytes = bytes;
408 atomic_inc(&page->count);
410 append_write_request(&NFS_WRITEBACK(inode), wreq);
412 if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
413 rpc_wake_up_next(&write_queue);
415 return wreq;
417 out_req:
418 rpc_release_task(task);
419 kfree(wreq);
420 out_fail:
421 return NULL;
425 * Schedule a writeback RPC call.
426 * If the server is congested, don't add to our backlog of queued
427 * requests but call it synchronously.
428 * The function returns false if the page has been unlocked as the
429 * consequence of a synchronous write call.
431 * FIXME: Here we could walk the inode's lock list to see whether the
432 * page we're currently writing to has been write-locked by the caller.
433 * If it is, we could schedule an async write request with a long
434 * delay in order to avoid writing back the page until the lock is
435 * released.
437 static inline int
438 schedule_write_request(struct nfs_wreq *req, int sync)
440 struct rpc_task *task = &req->wb_task;
441 struct inode *inode = req->wb_inode;
443 if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
444 sync = 1;
446 if (sync) {
447 sigset_t oldmask;
448 struct rpc_clnt *clnt = NFS_CLIENT(inode);
449 dprintk("NFS: %4d schedule_write_request (sync)\n",
450 task->tk_pid);
451 /* Page is already locked */
452 req->wb_flags |= NFS_WRITE_LOCKED;
453 rpc_clnt_sigmask(clnt, &oldmask);
454 rpc_execute(task);
455 rpc_clnt_sigunmask(clnt, &oldmask);
456 } else {
457 dprintk("NFS: %4d schedule_write_request (async)\n",
458 task->tk_pid);
459 task->tk_flags |= RPC_TASK_ASYNC;
460 task->tk_timeout = NFS_WRITEBACK_DELAY;
461 rpc_sleep_on(&write_queue, task, NULL, NULL);
464 return sync == 0;
468 * Wait for request to complete
469 * This is almost a copy of __wait_on_page
471 static inline int
472 wait_on_write_request(struct nfs_wreq *req)
474 struct wait_queue wait = { current, NULL };
475 struct page *page = req->wb_page;
476 int retval;
477 sigset_t oldmask;
478 struct rpc_clnt *clnt = NFS_CLIENT(req->wb_inode);
480 rpc_clnt_sigmask(clnt, &oldmask);
481 add_wait_queue(&page->wait, &wait);
482 atomic_inc(&page->count);
483 for (;;) {
484 current->state = TASK_INTERRUPTIBLE;
485 retval = 0;
486 if (!PageLocked(page))
487 break;
488 retval = -ERESTARTSYS;
489 if (signalled())
490 break;
491 schedule();
493 remove_wait_queue(&page->wait, &wait);
494 current->state = TASK_RUNNING;
495 /* N.B. page may have been unused, so we must use free_page() */
496 free_page(page_address(page));
497 rpc_clnt_sigunmask(clnt, &oldmask);
498 return retval;
502 * Write a page to the server. This will be used for NFS swapping only
503 * (for now), and we currently do this synchronously only.
506 nfs_writepage(struct file * file, struct page *page)
508 struct dentry *dentry = file->f_dentry;
509 return nfs_writepage_sync(dentry, dentry->d_inode, page, 0, PAGE_SIZE);
513 * Update and possibly write a cached page of an NFS file.
515 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
516 * things with a page scheduled for an RPC call (e.g. invalidate it).
519 nfs_updatepage(struct file *file, struct page *page, const char *buffer,
520 unsigned long offset, unsigned int count, int sync)
522 struct dentry *dentry = file->f_dentry;
523 struct inode *inode = dentry->d_inode;
524 u8 *page_addr = (u8 *) page_address(page);
525 struct nfs_wreq *req;
526 int status = 0, page_locked = 1;
528 dprintk("NFS: nfs_updatepage(%s/%s %d@%ld, sync=%d)\n",
529 dentry->d_parent->d_name.name, dentry->d_name.name,
530 count, page->offset+offset, sync);
532 set_bit(PG_locked, &page->flags);
535 * Try to find a corresponding request on the writeback queue.
536 * If there is one, we can be sure that this request is not
537 * yet being processed, because we hold a lock on the page.
539 * If the request was created by us, update it. Otherwise,
540 * transfer the page lock and flush out the dirty page now.
541 * After returning, generic_file_write will wait on the
542 * page and retry the update.
544 if ((req = find_write_request(inode, page)) != NULL) {
545 if (update_write_request(req, offset, count)) {
546 /* N.B. check for a fault here and cancel the req */
548 * SECURITY - copy_from_user must zero the
549 * rest of the data after a fault!
551 copy_from_user(page_addr + offset, buffer, count);
552 goto updated;
554 dprintk("NFS: wake up conflicting write request.\n");
555 transfer_page_lock(req);
556 return 0;
559 /* Copy data to page buffer. */
560 status = -EFAULT;
561 if (copy_from_user(page_addr + offset, buffer, count))
562 goto done;
564 /* If wsize is smaller than page size, update and write
565 * page synchronously.
567 if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
568 return nfs_writepage_sync(dentry, inode, page, offset, count);
570 /* Create the write request. */
571 status = -ENOBUFS;
572 req = create_write_request(dentry, inode, page, offset, count);
573 if (!req)
574 goto done;
576 /* Schedule request */
577 page_locked = schedule_write_request(req, sync);
579 updated:
581 * If we wrote up to the end of the chunk, transmit request now.
582 * We should be a bit more intelligent about detecting whether a
583 * process accesses the file sequentially or not.
585 if (page_locked && (offset + count >= PAGE_SIZE || sync))
586 req->wb_flags |= NFS_WRITE_WANTLOCK;
588 /* If the page was written synchronously, return any error that
589 * may have happened; otherwise return the write count. */
590 if (page_locked || (status = nfs_write_error(inode)) >= 0)
591 status = count;
593 done:
594 /* Unlock page and wake up anyone sleeping on it */
595 if (page_locked) {
596 if (req && WB_WANTLOCK(req)) {
597 transfer_page_lock(req);
598 /* rpc_execute(&req->wb_task); */
599 if (sync) {
600 /* N.B. if signalled, result not ready? */
601 wait_on_write_request(req);
602 if ((count = nfs_write_error(inode)) < 0)
603 status = count;
605 } else {
606 if (status < 0) {
607 printk("NFS: %s/%s write failed, clearing bit\n",
608 dentry->d_parent->d_name.name, dentry->d_name.name);
609 clear_bit(PG_uptodate, &page->flags);
611 nfs_unlock_page(page);
615 dprintk("NFS: nfs_updatepage returns %d (isize %ld)\n",
616 status, inode->i_size);
617 return status;
621 * Flush out a dirty page.
623 static void
624 nfs_flush_request(struct nfs_wreq *req)
626 struct page *page = req->wb_page;
628 #ifdef NFS_DEBUG_VERBOSE
629 if (req->wb_inode != page->inode)
630 printk("NFS: inode %ld no longer has page %p\n", req->wb_inode->i_ino, page);
631 #endif
632 dprintk("NFS: nfs_flush_request(%s/%s, @%ld)\n",
633 req->wb_dentry->d_parent->d_name.name,
634 req->wb_dentry->d_name.name, page->offset);
636 req->wb_flags |= NFS_WRITE_WANTLOCK;
637 if (!test_and_set_bit(PG_locked, &page->flags)) {
638 transfer_page_lock(req);
639 } else {
640 printk(KERN_WARNING "NFS oops in %s: can't lock page!\n",
641 __FUNCTION__);
642 rpc_wake_up_task(&req->wb_task);
647 * Flush writeback requests. See nfs_flush_dirty_pages for details.
649 static struct nfs_wreq *
650 nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
651 int invalidate)
653 struct nfs_wreq *head, *req, *last = NULL;
654 off_t rqoffset, rqend, end;
656 end = len? offset + len : 0x7fffffffUL;
658 req = head = NFS_WRITEBACK(inode);
659 while (req != NULL) {
660 dprintk("NFS: %4d nfs_flush inspect %s/%s @%ld fl %x\n",
661 req->wb_task.tk_pid,
662 req->wb_dentry->d_parent->d_name.name,
663 req->wb_dentry->d_name.name,
664 req->wb_page->offset, req->wb_flags);
666 rqoffset = req->wb_page->offset + req->wb_offset;
667 rqend = rqoffset + req->wb_bytes;
668 if (rqoffset < end && offset < rqend &&
669 (pid == 0 || req->wb_pid == pid)) {
670 if (!WB_INPROGRESS(req) && !WB_HAVELOCK(req)) {
671 #ifdef NFS_DEBUG_VERBOSE
672 printk("nfs_flush: flushing inode=%ld, %d @ %lu\n",
673 req->wb_inode->i_ino, req->wb_bytes, rqoffset);
674 #endif
675 nfs_flush_request(req);
677 last = req;
679 if (invalidate)
680 req->wb_flags |= NFS_WRITE_INVALIDATE;
681 if ((req = WB_NEXT(req)) == head)
682 break;
685 return last;
689 * Cancel a write request. We always mark it cancelled,
690 * but if it's already in progress there's no point in
691 * calling rpc_exit, and we don't want to overwrite the
692 * tk_status field.
694 static void
695 nfs_cancel_request(struct nfs_wreq *req)
697 req->wb_flags |= NFS_WRITE_CANCELLED;
698 if (!WB_INPROGRESS(req)) {
699 rpc_exit(&req->wb_task, 0);
700 rpc_wake_up_task(&req->wb_task);
705 * Cancel all writeback requests, both pending and in progress.
707 static void
708 nfs_cancel_dirty(struct inode *inode, pid_t pid)
710 struct nfs_wreq *head, *req;
712 req = head = NFS_WRITEBACK(inode);
713 while (req != NULL) {
714 if (pid == 0 || req->wb_pid == pid)
715 nfs_cancel_request(req);
716 if ((req = WB_NEXT(req)) == head)
717 break;
722 * Flush out all dirty pages belonging to a certain user process and
723 * maybe wait for the RPC calls to complete.
725 * Another purpose of this function is sync()ing a file range before a
726 * write lock is released. This is what offset and length are for, even if
727 * this isn't used by the nlm module yet.
730 nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len)
732 struct nfs_wreq *last = NULL;
733 int result = 0, cancel = 0;
735 dprintk("NFS: flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n",
736 inode->i_dev, inode->i_ino, current->pid, offset, len);
738 if (IS_SOFT && signalled()) {
739 nfs_cancel_dirty(inode, pid);
740 cancel = 1;
743 for (;;) {
744 if (IS_SOFT && signalled()) {
745 if (!cancel)
746 nfs_cancel_dirty(inode, pid);
747 result = -ERESTARTSYS;
748 break;
751 /* Flush all pending writes for the pid and file region */
752 last = nfs_flush_pages(inode, pid, offset, len, 0);
753 if (last == NULL)
754 break;
755 wait_on_write_request(last);
758 return result;
762 * Flush out any pending write requests and flag that they be discarded
763 * after the write is complete.
765 * This function is called from nfs_refresh_inode just before it calls
766 * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure
767 * that all dirty pages are locked, so that invalidate_inode_pages does
768 * not throw away any dirty pages.
770 void
771 nfs_invalidate_pages(struct inode *inode)
773 dprintk("NFS: nfs_invalidate_pages(%x/%ld)\n",
774 inode->i_dev, inode->i_ino);
776 nfs_flush_pages(inode, 0, 0, 0, 1);
780 * Cancel any pending write requests after a given offset
781 * (called from nfs_notify_change).
784 nfs_truncate_dirty_pages(struct inode *inode, unsigned long offset)
786 struct nfs_wreq *req, *head;
787 unsigned long rqoffset;
789 dprintk("NFS: truncate_dirty_pages(%d/%ld, %ld)\n",
790 inode->i_dev, inode->i_ino, offset);
792 req = head = NFS_WRITEBACK(inode);
793 while (req != NULL) {
794 rqoffset = req->wb_page->offset + req->wb_offset;
796 if (rqoffset >= offset) {
797 nfs_cancel_request(req);
798 } else if (rqoffset + req->wb_bytes >= offset) {
799 req->wb_bytes = offset - rqoffset;
801 if ((req = WB_NEXT(req)) == head)
802 break;
805 return 0;
809 * Check if a previous write operation returned an error
812 nfs_check_error(struct inode *inode)
814 struct nfs_wreq *req;
815 int status = 0;
817 dprintk("nfs: checking for write error inode %04x/%ld\n",
818 inode->i_dev, inode->i_ino);
820 req = find_failed_request(inode, current->pid);
821 if (req) {
822 dprintk("nfs: write error %d inode %04x/%ld\n",
823 req->wb_task.tk_status, inode->i_dev, inode->i_ino);
825 status = req->wb_task.tk_status;
826 remove_failed_request(req);
828 return status;
832 * The following procedures make up the writeback finite state machinery:
834 * 1. Try to lock the page if not yet locked by us,
835 * set up the RPC call info, and pass to the call FSM.
837 static void
838 nfs_wback_lock(struct rpc_task *task)
840 struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
841 struct page *page = req->wb_page;
842 struct dentry *dentry = req->wb_dentry;
844 dprintk("NFS: %4d nfs_wback_lock (%s/%s, status=%d flags=%x)\n",
845 task->tk_pid, dentry->d_parent->d_name.name,
846 dentry->d_name.name, task->tk_status, req->wb_flags);
848 if (!WB_HAVELOCK(req))
849 req->wb_flags |= NFS_WRITE_WANTLOCK;
851 if (WB_WANTLOCK(req) && test_and_set_bit(PG_locked, &page->flags))
852 goto out_locked;
853 req->wb_flags &= ~NFS_WRITE_WANTLOCK;
854 req->wb_flags |= NFS_WRITE_LOCKED;
855 task->tk_status = 0;
857 /* Setup the task struct for a writeback call */
858 req->wb_args.fh = NFS_FH(dentry);
859 req->wb_args.offset = page->offset + req->wb_offset;
860 req->wb_args.count = req->wb_bytes;
861 req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
863 rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
865 req->wb_flags |= NFS_WRITE_INPROGRESS;
866 return;
868 out_locked:
869 printk("NFS: page already locked in writeback_lock!\n");
870 task->tk_timeout = 2 * HZ;
871 rpc_sleep_on(&write_queue, task, NULL, NULL);
872 return;
876 * 2. Collect the result
878 static void
879 nfs_wback_result(struct rpc_task *task)
881 struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
882 struct inode *inode = req->wb_inode;
883 struct page *page = req->wb_page;
884 int status = task->tk_status;
886 dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
887 task->tk_pid, req->wb_dentry->d_parent->d_name.name,
888 req->wb_dentry->d_name.name, status, req->wb_flags);
890 /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
891 req->wb_flags |= NFS_WRITE_COMPLETE;
892 if (status < 0) {
894 * An error occurred. Report the error back to the
895 * application by adding the request to the failed
896 * requests list.
898 if (find_failed_request(inode, req->wb_pid))
899 status = 0;
900 clear_bit(PG_uptodate, &page->flags);
901 } else if (!WB_CANCELLED(req)) {
902 struct nfs_fattr *fattr = &req->wb_fattr;
903 /* Update attributes as result of writeback.
904 * Beware: when UDP replies arrive out of order, we
905 * may end up overwriting a previous, bigger file size.
907 * When the file size shrinks we cancel all pending
908 * writebacks.
910 if (fattr->mtime.seconds >= inode->i_mtime) {
911 if (fattr->size < inode->i_size)
912 fattr->size = inode->i_size;
914 /* possible Solaris 2.5 server bug workaround */
915 if (inode->i_ino == fattr->fileid) {
917 * We expect these values to change, and
918 * don't want to invalidate the caches.
920 inode->i_size = fattr->size;
921 inode->i_mtime = fattr->mtime.seconds;
922 nfs_refresh_inode(inode, fattr);
924 else
925 printk("nfs_wback_result: inode %ld, got %u?\n",
926 inode->i_ino, fattr->fileid);
931 * This call might block, so we defer removing the request
932 * from the inode's writeback list.
934 rpc_release_task(task);
936 if (WB_INVALIDATE(req))
937 clear_bit(PG_uptodate, &page->flags);
938 if (WB_HAVELOCK(req))
939 nfs_unlock_page(page);
942 * Now it's safe to remove the request from the inode's
943 * writeback list and wake up any tasks sleeping on it.
944 * If the request failed, add it to the failed list.
946 remove_write_request(&NFS_WRITEBACK(inode), req);
948 if (status >= 0)
949 kfree(req);
950 else {
951 dprintk("NFS: %4d saving write failure code\n", task->tk_pid);
952 append_failed_request(req);
955 free_page(page_address(page));
956 nr_write_requests--;