nfs: patches backported from 2.6 upstream
[tomato.git] / release / src / linux / linux / fs / nfs / write.c
bloba14a90aa217328c9afd1d214000f3040d2a99d69
1 /*
2 * linux/fs/nfs/write.c
4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
27 * cases:
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
64 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
67 * Local structures
69 * This is the struct where the WRITE/COMMIT arguments go.
71 struct nfs_write_data {
72 struct rpc_task task;
73 struct inode *inode;
74 struct rpc_cred *cred;
75 struct nfs_writeargs args; /* argument struct */
76 struct nfs_writeres res; /* result struct */
77 struct nfs_fattr fattr;
78 struct nfs_writeverf verf;
79 struct list_head pages; /* Coalesced requests we wish to flush */
80 struct page *pagevec[NFS_WRITE_MAXIOV];
84 * Local function declarations
86 static struct nfs_page * nfs_update_request(struct file*, struct inode *,
87 struct page *,
88 unsigned int, unsigned int);
89 static void nfs_strategy(struct inode *inode);
90 static void nfs_writeback_done(struct rpc_task *);
91 #ifdef CONFIG_NFS_V3
92 static void nfs_commit_done(struct rpc_task *);
93 #endif
95 /* Hack for future NFS swap support */
96 #ifndef IS_SWAPFILE
97 # define IS_SWAPFILE(inode) (0)
98 #endif
100 static kmem_cache_t *nfs_wdata_cachep;
102 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
104 struct nfs_write_data *p;
105 p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS);
106 if (p) {
107 memset(p, 0, sizeof(*p));
108 INIT_LIST_HEAD(&p->pages);
109 p->args.pages = p->pagevec;
111 return p;
114 static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
116 kmem_cache_free(nfs_wdata_cachep, p);
119 static void nfs_writedata_release(struct rpc_task *task)
121 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
122 nfs_writedata_free(wdata);
126 * Write a page synchronously.
127 * Offset is the data offset within the page.
129 static int
130 nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
131 unsigned int offset, unsigned int count)
133 struct rpc_cred *cred = NULL;
134 loff_t base;
135 unsigned int wsize = NFS_SERVER(inode)->wsize;
136 int result, refresh = 0, written = 0, flags;
137 u8 *buffer;
138 struct nfs_fattr fattr;
139 struct nfs_writeverf verf;
142 if (file)
143 cred = get_rpccred(nfs_file_cred(file));
144 if (!cred)
145 cred = get_rpccred(NFS_I(inode)->mm_cred);
147 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
148 inode->i_dev, (long long)NFS_FILEID(inode),
149 count, (long long)(page_offset(page) + offset));
151 base = page_offset(page) + offset;
153 flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
155 do {
156 if (count < wsize && !IS_SWAPFILE(inode))
157 wsize = count;
159 result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
160 offset, wsize, page, &verf);
161 nfs_write_attributes(inode, &fattr);
163 if (result < 0) {
164 /* Must mark the page invalid after I/O error */
165 ClearPageUptodate(page);
166 goto io_error;
168 if (result != wsize)
169 printk("NFS: short write, wsize=%u, result=%d\n",
170 wsize, result);
171 refresh = 1;
172 buffer += wsize;
173 base += wsize;
174 offset += wsize;
175 written += wsize;
176 count -= wsize;
178 * If we've extended the file, update the inode
179 * now so we don't invalidate the cache.
181 if (base > inode->i_size)
182 inode->i_size = base;
183 } while (count);
185 if (PageError(page))
186 ClearPageError(page);
188 io_error:
189 if (cred)
190 put_rpccred(cred);
192 return written? written : result;
195 static int
196 nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
197 unsigned int offset, unsigned int count)
199 struct nfs_page *req;
200 loff_t end;
201 int status;
203 req = nfs_update_request(file, inode, page, offset, count);
204 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
205 if (status < 0)
206 goto out;
207 if (!req->wb_cred)
208 req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
209 nfs_unlock_request(req);
210 nfs_strategy(inode);
211 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
212 if (inode->i_size < end)
213 inode->i_size = end;
215 out:
216 return status;
220 * Write an mmapped page to the server.
223 nfs_writepage(struct page *page)
225 struct inode *inode = page->mapping->host;
226 unsigned long end_index;
227 unsigned offset = PAGE_CACHE_SIZE;
228 int inode_referenced = 0;
229 int err;
232 * Note: We need to ensure that we have a reference to the inode
233 * if we are to do asynchronous writes. If not, waiting
234 * in nfs_wait_on_request() may deadlock with clear_inode().
236 * If igrab() fails here, then it is in any case safe to
237 * call nfs_wb_page(), since there will be no pending writes.
239 if (igrab(inode) != 0)
240 inode_referenced = 1;
241 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
243 /* Ensure we've flushed out any previous writes */
244 nfs_wb_page(inode,page);
246 /* easy case */
247 if (page->index < end_index)
248 goto do_it;
249 /* things got complicated... */
250 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
252 /* OK, are we completely out? */
253 err = -EIO;
254 if (page->index >= end_index+1 || !offset)
255 goto out;
256 do_it:
257 lock_kernel();
258 if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) &&
259 inode_referenced) {
260 err = nfs_writepage_async(NULL, inode, page, 0, offset);
261 if (err >= 0)
262 err = 0;
263 } else {
264 err = nfs_writepage_sync(NULL, inode, page, 0, offset);
265 if (err == offset)
266 err = 0;
268 unlock_kernel();
269 out:
270 UnlockPage(page);
271 if (inode_referenced)
272 iput(inode);
273 return err;
277 * Check whether the file range we want to write to is locked by
278 * us.
280 static int
281 region_locked(struct inode *inode, struct nfs_page *req)
283 struct file_lock *fl;
284 loff_t rqstart, rqend;
286 /* Don't optimize writes if we don't use NLM */
287 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
288 return 0;
290 rqstart = page_offset(req->wb_page) + req->wb_offset;
291 rqend = rqstart + req->wb_bytes;
292 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
293 if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
294 && fl->fl_type == F_WRLCK
295 && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
296 return 1;
300 return 0;
304 * Insert a write request into an inode
305 * Note: we sort the list in order to be able to optimize nfs_find_request()
306 * & co. for the 'write append' case. For 2.5 we may want to consider
307 * some form of hashing so as to perform well on random writes.
309 static inline void
310 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
312 struct list_head *pos, *head;
313 unsigned long pg_idx = page_index(req->wb_page);
315 if (!list_empty(&req->wb_hash))
316 return;
317 if (!NFS_WBACK_BUSY(req))
318 printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
319 head = &inode->u.nfs_i.writeback;
320 if (list_empty(head))
321 igrab(inode);
322 list_for_each_prev(pos, head) {
323 struct nfs_page *entry = nfs_inode_wb_entry(pos);
324 if (page_index(entry->wb_page) < pg_idx)
325 break;
327 inode->u.nfs_i.npages++;
328 list_add(&req->wb_hash, pos);
329 req->wb_count++;
333 * Insert a write request into an inode
335 static inline void
336 nfs_inode_remove_request(struct nfs_page *req)
338 struct inode *inode;
339 spin_lock(&nfs_wreq_lock);
340 if (list_empty(&req->wb_hash)) {
341 spin_unlock(&nfs_wreq_lock);
342 return;
344 if (!NFS_WBACK_BUSY(req))
345 printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
346 inode = req->wb_inode;
347 list_del(&req->wb_hash);
348 INIT_LIST_HEAD(&req->wb_hash);
349 inode->u.nfs_i.npages--;
350 if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
351 printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
352 if (list_empty(&inode->u.nfs_i.writeback)) {
353 spin_unlock(&nfs_wreq_lock);
354 iput(inode);
355 } else
356 spin_unlock(&nfs_wreq_lock);
357 nfs_clear_request(req);
358 nfs_release_request(req);
362 * Find a request
364 static inline struct nfs_page *
365 _nfs_find_request(struct inode *inode, struct page *page)
367 struct list_head *head, *pos;
368 unsigned long pg_idx = page_index(page);
370 head = &inode->u.nfs_i.writeback;
371 list_for_each_prev(pos, head) {
372 struct nfs_page *req = nfs_inode_wb_entry(pos);
373 unsigned long found_idx = page_index(req->wb_page);
375 if (pg_idx < found_idx)
376 continue;
377 if (pg_idx != found_idx)
378 break;
379 req->wb_count++;
380 return req;
382 return NULL;
385 static struct nfs_page *
386 nfs_find_request(struct inode *inode, struct page *page)
388 struct nfs_page *req;
390 spin_lock(&nfs_wreq_lock);
391 req = _nfs_find_request(inode, page);
392 spin_unlock(&nfs_wreq_lock);
393 return req;
397 * Add a request to the inode's dirty list.
399 static inline void
400 nfs_mark_request_dirty(struct nfs_page *req)
402 struct inode *inode = req->wb_inode;
404 spin_lock(&nfs_wreq_lock);
405 nfs_list_add_request(req, &inode->u.nfs_i.dirty);
406 inode->u.nfs_i.ndirty++;
407 __nfs_del_lru(req);
408 __nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req);
409 spin_unlock(&nfs_wreq_lock);
410 mark_inode_dirty(inode);
414 * Check if a request is dirty
416 static inline int
417 nfs_dirty_request(struct nfs_page *req)
419 struct inode *inode = req->wb_inode;
420 return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
423 #ifdef CONFIG_NFS_V3
425 * Add a request to the inode's commit list.
427 static inline void
428 nfs_mark_request_commit(struct nfs_page *req)
430 struct inode *inode = req->wb_inode;
432 spin_lock(&nfs_wreq_lock);
433 nfs_list_add_request(req, &inode->u.nfs_i.commit);
434 inode->u.nfs_i.ncommit++;
435 __nfs_del_lru(req);
436 __nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req);
437 spin_unlock(&nfs_wreq_lock);
438 mark_inode_dirty(inode);
440 #endif
443 * Wait for a request to complete.
445 * Interruptible by signals only if mounted with intr flag.
447 static int
448 nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
450 struct list_head *p, *head;
451 unsigned long idx_end;
452 unsigned int res = 0;
453 int error;
455 if (npages == 0)
456 idx_end = ~0;
457 else
458 idx_end = idx_start + npages - 1;
460 head = &inode->u.nfs_i.writeback;
461 restart:
462 spin_lock(&nfs_wreq_lock);
463 list_for_each_prev(p, head) {
464 unsigned long pg_idx;
465 struct nfs_page *req = nfs_inode_wb_entry(p);
467 pg_idx = page_index(req->wb_page);
468 if (pg_idx < idx_start)
469 break;
470 if (pg_idx > idx_end)
471 continue;
473 if (!NFS_WBACK_BUSY(req))
474 continue;
475 req->wb_count++;
476 spin_unlock(&nfs_wreq_lock);
477 error = nfs_wait_on_request(req);
478 nfs_release_request(req);
479 if (error < 0)
480 return error;
481 res++;
482 goto restart;
484 spin_unlock(&nfs_wreq_lock);
485 return res;
489 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
490 * @server: NFS superblock data
491 * @dst: destination list
493 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
494 * The elements are checked to ensure that they form a contiguous set
495 * of pages, and that they originated from the same file.
498 nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst)
500 struct inode *inode;
501 int npages;
503 npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages);
504 if (npages) {
505 inode = nfs_list_entry(dst->next)->wb_inode;
506 inode->u.nfs_i.ndirty -= npages;
508 return npages;
512 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
513 * @server: NFS superblock data
514 * @dst: destination list
516 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
517 * The elements are checked to ensure that they form a contiguous set
518 * of pages, and that they originated from the same file.
521 nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst)
523 struct inode *inode;
524 int npages;
526 npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages);
527 if (npages) {
528 inode = nfs_list_entry(dst->next)->wb_inode;
529 inode->u.nfs_i.ndirty -= npages;
531 return npages;
535 * nfs_scan_dirty - Scan an inode for dirty requests
536 * @inode: NFS inode to scan
537 * @dst: destination list
538 * @idx_start: lower bound of page->index to scan.
539 * @npages: idx_start + npages sets the upper bound to scan.
541 * Moves requests from the inode's dirty page list.
542 * The requests are *not* checked to ensure that they form a contiguous set.
544 static int
545 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
547 int res;
548 res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, idx_start, npages);
549 inode->u.nfs_i.ndirty -= res;
550 if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
551 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
552 return res;
555 #ifdef CONFIG_NFS_V3
557 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
558 * @server: NFS superblock data
559 * @dst: destination list
561 * Finds the first a timed out request in the NFS commit LRU list and moves it
562 * to the list dst. If such an element is found, we move all other commit
563 * requests that apply to the same inode.
564 * The assumption is that doing everything in a single commit-to-disk is
565 * the cheaper alternative.
568 nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst)
570 struct inode *inode;
571 int npages;
573 npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1);
574 if (npages) {
575 inode = nfs_list_entry(dst->next)->wb_inode;
576 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, 0, 0);
577 inode->u.nfs_i.ncommit -= npages;
579 return npages;
584 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
585 * @server: NFS superblock data
586 * @dst: destination list
588 * Finds the first request in the NFS commit LRU list and moves it
589 * to the list dst. If such an element is found, we move all other commit
590 * requests that apply to the same inode.
591 * The assumption is that doing everything in a single commit-to-disk is
592 * the cheaper alternative.
595 nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst)
597 struct inode *inode;
598 int npages;
600 npages = nfs_scan_lru(&server->lru_commit, dst, 1);
601 if (npages) {
602 inode = nfs_list_entry(dst->next)->wb_inode;
603 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, 0, 0);
604 inode->u.nfs_i.ncommit -= npages;
606 return npages;
610 * nfs_scan_commit - Scan an inode for commit requests
611 * @inode: NFS inode to scan
612 * @dst: destination list
613 * @idx_start: lower bound of page->index to scan.
614 * @npages: idx_start + npages sets the upper bound to scan.
616 * Moves requests from the inode's 'commit' request list.
617 * The requests are *not* checked to ensure that they form a contiguous set.
619 static int
620 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
622 int res;
623 res = nfs_scan_list(&inode->u.nfs_i.commit, dst, idx_start, npages);
624 inode->u.nfs_i.ncommit -= res;
625 if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
626 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
627 return res;
629 #endif
633 * Try to update any existing write request, or create one if there is none.
634 * In order to match, the request's credentials must match those of
635 * the calling process.
637 * Note: Should always be called with the Page Lock held!
639 static struct nfs_page *
640 nfs_update_request(struct file* file, struct inode *inode, struct page *page,
641 unsigned int offset, unsigned int bytes)
643 struct nfs_page *req, *new = NULL;
644 unsigned long rqend, end;
646 end = offset + bytes;
648 for (;;) {
649 /* Loop over all inode entries and see if we find
650 * A request for the page we wish to update
652 spin_lock(&nfs_wreq_lock);
653 req = _nfs_find_request(inode, page);
654 if (req) {
655 if (!nfs_lock_request_dontget(req)) {
656 int error;
657 spin_unlock(&nfs_wreq_lock);
658 error = nfs_wait_on_request(req);
659 nfs_release_request(req);
660 if (error < 0) {
661 if (new)
662 nfs_release_request(new);
663 return ERR_PTR(error);
665 continue;
667 spin_unlock(&nfs_wreq_lock);
668 if (new)
669 nfs_release_request(new);
670 break;
673 if (new) {
674 nfs_lock_request_dontget(new);
675 nfs_inode_add_request(inode, new);
676 spin_unlock(&nfs_wreq_lock);
677 nfs_mark_request_dirty(new);
678 return new;
680 spin_unlock(&nfs_wreq_lock);
682 new = nfs_create_request(nfs_file_cred(file), inode, page, offset, bytes);
683 if (IS_ERR(new))
684 return new;
685 if (file) {
686 new->wb_file = file;
687 get_file(file);
689 /* If the region is locked, adjust the timeout */
690 if (region_locked(inode, new))
691 new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
692 else
693 new->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
696 /* We have a request for our page.
697 * If the creds don't match, or the
698 * page addresses don't match,
699 * tell the caller to wait on the conflicting
700 * request.
702 rqend = req->wb_offset + req->wb_bytes;
703 if (req->wb_file != file
704 || req->wb_page != page
705 || !nfs_dirty_request(req)
706 || offset > rqend || end < req->wb_offset) {
707 nfs_unlock_request(req);
708 return ERR_PTR(-EBUSY);
711 /* Okay, the request matches. Update the region */
712 if (offset < req->wb_offset) {
713 req->wb_offset = offset;
714 req->wb_bytes = rqend - req->wb_offset;
717 if (end > rqend)
718 req->wb_bytes = end - req->wb_offset;
720 return req;
724 * This is the strategy routine for NFS.
725 * It is called by nfs_updatepage whenever the user wrote up to the end
726 * of a page.
728 * We always try to submit a set of requests in parallel so that the
729 * server's write code can gather writes. This is mainly for the benefit
730 * of NFSv2.
732 * We never submit more requests than we think the remote can handle.
733 * For UDP sockets, we make sure we don't exceed the congestion window;
734 * for TCP, we limit the number of requests to 8.
736 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
737 * should be sent out in one go. This is for the benefit of NFSv2 servers
738 * that perform write gathering.
740 * FIXME: Different servers may have different sweet spots.
741 * Record the average congestion window in server struct?
743 #define NFS_STRATEGY_PAGES 8
744 static void
745 nfs_strategy(struct inode *inode)
747 unsigned int dirty, wpages;
749 dirty = inode->u.nfs_i.ndirty;
750 wpages = NFS_SERVER(inode)->wpages;
751 #ifdef CONFIG_NFS_V3
752 if (NFS_PROTO(inode)->version == 2) {
753 if (dirty >= NFS_STRATEGY_PAGES * wpages)
754 nfs_flush_file(inode, 0, 0, 0);
755 } else if (dirty >= wpages)
756 nfs_flush_file(inode, 0, 0, 0);
757 #else
758 if (dirty >= NFS_STRATEGY_PAGES * wpages)
759 nfs_flush_file(inode, 0, 0, 0);
760 #endif
764 nfs_flush_incompatible(struct file *file, struct page *page)
766 struct rpc_cred *cred = nfs_file_cred(file);
767 struct inode *inode = page->mapping->host;
768 struct nfs_page *req;
769 int status = 0;
771 * Look for a request corresponding to this page. If there
772 * is one, and it belongs to another file, we flush it out
773 * before we try to copy anything into the page. Do this
774 * due to the lack of an ACCESS-type call in NFSv2.
775 * Also do the same if we find a request from an existing
776 * dropped page.
778 req = nfs_find_request(inode,page);
779 if (req) {
780 if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
781 status = nfs_wb_page(inode, page);
782 nfs_release_request(req);
784 return (status < 0) ? status : 0;
788 * Update and possibly write a cached page of an NFS file.
790 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
791 * things with a page scheduled for an RPC call (e.g. invalidate it).
794 nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count)
796 struct dentry *dentry = file->f_dentry;
797 struct inode *inode = page->mapping->host;
798 struct nfs_page *req;
799 loff_t end;
800 int status = 0;
802 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
803 dentry->d_parent->d_name.name, dentry->d_name.name,
804 count, (long long)(page_offset(page) +offset));
807 * If wsize is smaller than page size, update and write
808 * page synchronously.
810 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) {
811 status = nfs_writepage_sync(file, inode, page, offset, count);
812 if (status > 0) {
813 if (offset == 0 && status == PAGE_CACHE_SIZE)
814 SetPageUptodate(page);
815 return 0;
817 return status;
821 * Try to find an NFS request corresponding to this page
822 * and update it.
823 * If the existing request cannot be updated, we must flush
824 * it out now.
826 do {
827 req = nfs_update_request(file, inode, page, offset, count);
828 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
829 if (status != -EBUSY)
830 break;
831 /* Request could not be updated. Flush it out and try again */
832 status = nfs_wb_page(inode, page);
833 } while (status >= 0);
834 if (status < 0)
835 goto done;
837 status = 0;
838 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
839 if (inode->i_size < end)
840 inode->i_size = end;
842 /* If we wrote past the end of the page.
843 * Call the strategy routine so it can send out a bunch
844 * of requests.
846 if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
847 SetPageUptodate(page);
848 nfs_unlock_request(req);
849 nfs_strategy(inode);
850 } else
851 nfs_unlock_request(req);
852 done:
853 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
854 status, (long long)inode->i_size);
855 if (status < 0)
856 ClearPageUptodate(page);
857 return status;
861 * Set up the argument/result storage required for the RPC call.
863 static void
864 nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
866 struct nfs_page *req;
867 struct page **pages;
868 unsigned int count;
870 /* Set up the RPC argument and reply structs
871 * NB: take care not to mess about with data->commit et al. */
873 pages = data->args.pages;
874 count = 0;
875 while (!list_empty(head)) {
876 struct nfs_page *req = nfs_list_entry(head->next);
877 nfs_list_remove_request(req);
878 nfs_list_add_request(req, &data->pages);
879 *pages++ = req->wb_page;
880 count += req->wb_bytes;
882 req = nfs_list_entry(data->pages.next);
883 data->inode = req->wb_inode;
884 data->cred = req->wb_cred;
885 data->args.fh = NFS_FH(req->wb_inode);
886 data->args.offset = page_offset(req->wb_page) + req->wb_offset;
887 data->args.pgbase = req->wb_offset;
888 data->args.count = count;
889 data->res.fattr = &data->fattr;
890 data->res.count = count;
891 data->res.verf = &data->verf;
896 * Create an RPC task for the given write request and kick it.
897 * The page must have been locked by the caller.
899 * It may happen that the page we're passed is not marked dirty.
900 * This is the case if nfs_updatepage detects a conflicting request
901 * that has been written but not committed.
903 static int
904 nfs_flush_one(struct list_head *head, struct inode *inode, int how)
906 struct rpc_clnt *clnt = NFS_CLIENT(inode);
907 struct nfs_write_data *data;
908 struct rpc_task *task;
909 struct rpc_message msg;
910 int flags,
911 nfsvers = NFS_PROTO(inode)->version,
912 async = !(how & FLUSH_SYNC),
913 stable = (how & FLUSH_STABLE);
914 sigset_t oldset;
917 data = nfs_writedata_alloc();
918 if (!data)
919 goto out_bad;
920 task = &data->task;
922 /* Set the initial flags for the task. */
923 flags = (async) ? RPC_TASK_ASYNC : 0;
925 /* Set up the argument struct */
926 nfs_write_rpcsetup(head, data);
927 if (nfsvers < 3)
928 data->args.stable = NFS_FILE_SYNC;
929 else if (stable) {
930 if (!inode->u.nfs_i.ncommit)
931 data->args.stable = NFS_FILE_SYNC;
932 else
933 data->args.stable = NFS_DATA_SYNC;
934 } else
935 data->args.stable = NFS_UNSTABLE;
937 /* Finalize the task. */
938 rpc_init_task(task, clnt, nfs_writeback_done, flags);
939 task->tk_calldata = data;
940 /* Release requests */
941 task->tk_release = nfs_writedata_release;
943 #ifdef CONFIG_NFS_V3
944 msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
945 #else
946 msg.rpc_proc = NFSPROC_WRITE;
947 #endif
948 msg.rpc_argp = &data->args;
949 msg.rpc_resp = &data->res;
950 msg.rpc_cred = data->cred;
952 dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
953 task->tk_pid,
954 inode->i_dev,
955 (long long)NFS_FILEID(inode),
956 data->args.count);
958 rpc_clnt_sigmask(clnt, &oldset);
959 rpc_call_setup(task, &msg, 0);
960 lock_kernel();
961 rpc_execute(task);
962 unlock_kernel();
963 rpc_clnt_sigunmask(clnt, &oldset);
964 return 0;
965 out_bad:
966 while (!list_empty(head)) {
967 struct nfs_page *req = nfs_list_entry(head->next);
968 nfs_list_remove_request(req);
969 nfs_mark_request_dirty(req);
970 nfs_unlock_request(req);
972 return -ENOMEM;
976 nfs_flush_list(struct list_head *head, int wpages, int how)
978 LIST_HEAD(one_request);
979 struct nfs_page *req;
980 int error = 0;
981 unsigned int pages = 0;
983 while (!list_empty(head)) {
984 pages += nfs_coalesce_requests(head, &one_request, wpages);
985 req = nfs_list_entry(one_request.next);
986 error = nfs_flush_one(&one_request, req->wb_inode, how);
987 if (error < 0)
988 break;
990 if (error >= 0)
991 return pages;
993 while (!list_empty(head)) {
994 req = nfs_list_entry(head->next);
995 nfs_list_remove_request(req);
996 nfs_mark_request_dirty(req);
997 nfs_unlock_request(req);
999 return error;
1004 * This function is called when the WRITE call is complete.
1006 static void
1007 nfs_writeback_done(struct rpc_task *task)
1009 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
1010 struct nfs_writeargs *argp = &data->args;
1011 struct nfs_writeres *resp = &data->res;
1012 struct inode *inode = data->inode;
1013 struct nfs_page *req;
1014 struct page *page;
1016 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1017 task->tk_pid, task->tk_status);
1019 if (nfs_async_handle_jukebox(task))
1020 return;
1022 /* We can't handle that yet but we check for it nevertheless */
1023 if (resp->count < argp->count && task->tk_status >= 0) {
1024 static unsigned long complain;
1025 if (time_before(complain, jiffies)) {
1026 printk(KERN_WARNING
1027 "NFS: Server wrote less than requested.\n");
1028 complain = jiffies + 300 * HZ;
1030 /* Can't do anything about it right now except throw
1031 * an error. */
1032 task->tk_status = -EIO;
1034 #ifdef CONFIG_NFS_V3
1035 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1036 /* We tried a write call, but the server did not
1037 * commit data to stable storage even though we
1038 * requested it.
1039 * Note: There is a known bug in Tru64 < 5.0 in which
1040 * the server reports NFS_DATA_SYNC, but performs
1041 * NFS_FILE_SYNC. We therefore implement this checking
1042 * as a dprintk() in order to avoid filling syslog.
1044 static unsigned long complain;
1046 if (time_before(complain, jiffies)) {
1047 dprintk("NFS: faulty NFSv3 server %s:"
1048 " (committed = %d) != (stable = %d)\n",
1049 NFS_SERVER(inode)->hostname,
1050 resp->verf->committed, argp->stable);
1051 complain = jiffies + 300 * HZ;
1054 #endif
1057 * Update attributes as result of writeback.
1058 * FIXME: There is an inherent race with invalidate_inode_pages and
1059 * writebacks since the page->count is kept > 1 for as long
1060 * as the page has a write request pending.
1062 nfs_write_attributes(inode, resp->fattr);
1063 while (!list_empty(&data->pages)) {
1064 req = nfs_list_entry(data->pages.next);
1065 nfs_list_remove_request(req);
1066 page = req->wb_page;
1068 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1069 req->wb_inode->i_dev,
1070 (long long)NFS_FILEID(req->wb_inode),
1071 req->wb_bytes,
1072 (long long)(page_offset(page) + req->wb_offset));
1074 if (task->tk_status < 0) {
1075 ClearPageUptodate(page);
1076 SetPageError(page);
1077 if (req->wb_file)
1078 req->wb_file->f_error = task->tk_status;
1079 nfs_inode_remove_request(req);
1080 dprintk(", error = %d\n", task->tk_status);
1081 goto next;
1084 #ifdef CONFIG_NFS_V3
1085 if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
1086 nfs_inode_remove_request(req);
1087 dprintk(" OK\n");
1088 goto next;
1090 memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
1091 req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
1092 nfs_mark_request_commit(req);
1093 dprintk(" marked for commit\n");
1094 #else
1095 nfs_inode_remove_request(req);
1096 #endif
1097 next:
1098 nfs_unlock_request(req);
1103 #ifdef CONFIG_NFS_V3
1105 * Set up the argument/result storage required for the RPC call.
1107 static void
1108 nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
1110 struct nfs_page *first, *last;
1111 struct inode *inode;
1112 loff_t start, end, len;
1114 /* Set up the RPC argument and reply structs
1115 * NB: take care not to mess about with data->commit et al. */
1117 list_splice(head, &data->pages);
1118 INIT_LIST_HEAD(head);
1119 first = nfs_list_entry(data->pages.next);
1120 last = nfs_list_entry(data->pages.prev);
1121 inode = first->wb_inode;
1124 * Determine the offset range of requests in the COMMIT call.
1125 * We rely on the fact that data->pages is an ordered list...
1127 start = page_offset(first->wb_page) + first->wb_offset;
1128 end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
1129 len = end - start;
1130 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1131 if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
1132 len = 0;
1134 data->inode = inode;
1135 data->cred = first->wb_cred;
1136 data->args.fh = NFS_FH(inode);
1137 data->args.offset = start;
1138 data->res.count = data->args.count = (u32)len;
1139 data->res.fattr = &data->fattr;
1140 data->res.verf = &data->verf;
1144 * Commit dirty pages
1147 nfs_commit_list(struct list_head *head, int how)
1149 struct rpc_message msg;
1150 struct rpc_clnt *clnt;
1151 struct nfs_write_data *data;
1152 struct rpc_task *task;
1153 struct nfs_page *req;
1154 int flags,
1155 async = !(how & FLUSH_SYNC);
1156 sigset_t oldset;
1158 data = nfs_writedata_alloc();
1160 if (!data)
1161 goto out_bad;
1162 task = &data->task;
1164 flags = (async) ? RPC_TASK_ASYNC : 0;
1166 /* Set up the argument struct */
1167 nfs_commit_rpcsetup(head, data);
1168 req = nfs_list_entry(data->pages.next);
1169 clnt = NFS_CLIENT(req->wb_inode);
1171 rpc_init_task(task, clnt, nfs_commit_done, flags);
1172 task->tk_calldata = data;
1173 /* Release requests */
1174 task->tk_release = nfs_writedata_release;
1176 msg.rpc_proc = NFS3PROC_COMMIT;
1177 msg.rpc_argp = &data->args;
1178 msg.rpc_resp = &data->res;
1179 msg.rpc_cred = data->cred;
1181 dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1182 rpc_clnt_sigmask(clnt, &oldset);
1183 rpc_call_setup(task, &msg, 0);
1184 lock_kernel();
1185 rpc_execute(task);
1186 unlock_kernel();
1187 rpc_clnt_sigunmask(clnt, &oldset);
1188 return 0;
1189 out_bad:
1190 while (!list_empty(head)) {
1191 req = nfs_list_entry(head->next);
1192 nfs_list_remove_request(req);
1193 nfs_mark_request_commit(req);
1194 nfs_unlock_request(req);
1196 return -ENOMEM;
1200 * COMMIT call returned
1202 static void
1203 nfs_commit_done(struct rpc_task *task)
1205 struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
1206 struct nfs_writeres *resp = &data->res;
1207 struct nfs_page *req;
1208 struct inode *inode = data->inode;
1210 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1211 task->tk_pid, task->tk_status);
1213 if (nfs_async_handle_jukebox(task))
1214 return;
1216 nfs_write_attributes(inode, resp->fattr);
1217 while (!list_empty(&data->pages)) {
1218 req = nfs_list_entry(data->pages.next);
1219 nfs_list_remove_request(req);
1221 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1222 req->wb_inode->i_dev,
1223 (long long)NFS_FILEID(req->wb_inode),
1224 req->wb_bytes,
1225 (long long)(page_offset(req->wb_page) + req->wb_offset));
1226 if (task->tk_status < 0) {
1227 if (req->wb_file)
1228 req->wb_file->f_error = task->tk_status;
1229 nfs_inode_remove_request(req);
1230 dprintk(", error = %d\n", task->tk_status);
1231 goto next;
1234 /* Okay, COMMIT succeeded, apparently. Check the verifier
1235 * returned by the server against all stored verfs. */
1236 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1237 /* We have a match */
1238 nfs_inode_remove_request(req);
1239 dprintk(" OK\n");
1240 goto next;
1242 /* We have a mismatch. Write the page again */
1243 dprintk(" mismatch\n");
1244 nfs_mark_request_dirty(req);
1245 next:
1246 nfs_unlock_request(req);
1249 #endif
1251 int nfs_flush_file(struct inode *inode, unsigned long idx_start,
1252 unsigned int npages, int how)
1254 LIST_HEAD(head);
1255 int res,
1256 error = 0;
1258 spin_lock(&nfs_wreq_lock);
1259 res = nfs_scan_dirty(inode, &head, idx_start, npages);
1260 spin_unlock(&nfs_wreq_lock);
1261 if (res)
1262 error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1263 if (error < 0)
1264 return error;
1265 return res;
1268 #ifdef CONFIG_NFS_V3
1269 int nfs_commit_file(struct inode *inode, int how)
1271 LIST_HEAD(head);
1272 int res,
1273 error = 0;
1275 spin_lock(&nfs_wreq_lock);
1276 res = nfs_scan_commit(inode, &head, 0, 0);
1277 spin_unlock(&nfs_wreq_lock);
1278 if (res)
1279 error = nfs_commit_list(&head, how);
1280 if (error < 0)
1281 return error;
1282 return res;
1284 #endif
1286 int nfs_sync_file(struct inode *inode, unsigned long idx_start,
1287 unsigned int npages, int how)
1289 int error,
1290 wait;
1292 wait = how & FLUSH_WAIT;
1293 how &= ~FLUSH_WAIT;
1295 do {
1296 error = 0;
1297 if (wait)
1298 error = nfs_wait_on_requests(inode, idx_start, npages);
1299 if (error == 0)
1300 error = nfs_flush_file(inode, idx_start, npages, how);
1301 #ifdef CONFIG_NFS_V3
1302 if (error == 0)
1303 error = nfs_commit_file(inode, how);
1304 #endif
1305 } while (error > 0);
1306 return error;
1309 int nfs_init_writepagecache(void)
1311 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1312 sizeof(struct nfs_write_data),
1313 0, SLAB_HWCACHE_ALIGN,
1314 NULL, NULL);
1315 if (nfs_wdata_cachep == NULL)
1316 return -ENOMEM;
1318 return 0;
1321 void nfs_destroy_writepagecache(void)
1323 if (kmem_cache_destroy(nfs_wdata_cachep))
1324 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");