4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
64 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
69 * This is the struct where the WRITE/COMMIT arguments go.
71 struct nfs_write_data
{
74 struct rpc_cred
*cred
;
75 struct nfs_writeargs args
; /* argument struct */
76 struct nfs_writeres res
; /* result struct */
77 struct nfs_fattr fattr
;
78 struct nfs_writeverf verf
;
79 struct list_head pages
; /* Coalesced requests we wish to flush */
80 struct page
*pagevec
[NFS_WRITE_MAXIOV
];
84 * Local function declarations
86 static struct nfs_page
* nfs_update_request(struct file
*, struct inode
*,
88 unsigned int, unsigned int);
89 static void nfs_strategy(struct inode
*inode
);
90 static void nfs_writeback_done(struct rpc_task
*);
92 static void nfs_commit_done(struct rpc_task
*);
95 /* Hack for future NFS swap support */
97 # define IS_SWAPFILE(inode) (0)
100 static kmem_cache_t
*nfs_wdata_cachep
;
102 static __inline__
struct nfs_write_data
*nfs_writedata_alloc(void)
104 struct nfs_write_data
*p
;
105 p
= kmem_cache_alloc(nfs_wdata_cachep
, SLAB_NOFS
);
107 memset(p
, 0, sizeof(*p
));
108 INIT_LIST_HEAD(&p
->pages
);
109 p
->args
.pages
= p
->pagevec
;
114 static __inline__
void nfs_writedata_free(struct nfs_write_data
*p
)
116 kmem_cache_free(nfs_wdata_cachep
, p
);
119 static void nfs_writedata_release(struct rpc_task
*task
)
121 struct nfs_write_data
*wdata
= (struct nfs_write_data
*)task
->tk_calldata
;
122 nfs_writedata_free(wdata
);
126 * Write a page synchronously.
127 * Offset is the data offset within the page.
130 nfs_writepage_sync(struct file
*file
, struct inode
*inode
, struct page
*page
,
131 unsigned int offset
, unsigned int count
)
133 struct rpc_cred
*cred
= NULL
;
135 unsigned int wsize
= NFS_SERVER(inode
)->wsize
;
136 int result
, refresh
= 0, written
= 0, flags
;
138 struct nfs_fattr fattr
;
139 struct nfs_writeverf verf
;
143 cred
= get_rpccred(nfs_file_cred(file
));
145 cred
= get_rpccred(NFS_I(inode
)->mm_cred
);
147 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
148 inode
->i_dev
, (long long)NFS_FILEID(inode
),
149 count
, (long long)(page_offset(page
) + offset
));
151 base
= page_offset(page
) + offset
;
153 flags
= ((IS_SWAPFILE(inode
)) ? NFS_RW_SWAP
: 0) | NFS_RW_SYNC
;
156 if (count
< wsize
&& !IS_SWAPFILE(inode
))
159 result
= NFS_PROTO(inode
)->write(inode
, cred
, &fattr
, flags
,
160 offset
, wsize
, page
, &verf
);
161 nfs_write_attributes(inode
, &fattr
);
164 /* Must mark the page invalid after I/O error */
165 ClearPageUptodate(page
);
169 printk("NFS: short write, wsize=%u, result=%d\n",
178 * If we've extended the file, update the inode
179 * now so we don't invalidate the cache.
181 if (base
> inode
->i_size
)
182 inode
->i_size
= base
;
186 ClearPageError(page
);
192 return written
? written
: result
;
196 nfs_writepage_async(struct file
*file
, struct inode
*inode
, struct page
*page
,
197 unsigned int offset
, unsigned int count
)
199 struct nfs_page
*req
;
203 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
204 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
208 req
->wb_cred
= get_rpccred(NFS_I(inode
)->mm_cred
);
209 nfs_unlock_request(req
);
211 end
= ((loff_t
)page
->index
<<PAGE_CACHE_SHIFT
) + (loff_t
)(offset
+ count
);
212 if (inode
->i_size
< end
)
220 * Write an mmapped page to the server.
223 nfs_writepage(struct page
*page
)
225 struct inode
*inode
= page
->mapping
->host
;
226 unsigned long end_index
;
227 unsigned offset
= PAGE_CACHE_SIZE
;
228 int inode_referenced
= 0;
232 * Note: We need to ensure that we have a reference to the inode
233 * if we are to do asynchronous writes. If not, waiting
234 * in nfs_wait_on_request() may deadlock with clear_inode().
236 * If igrab() fails here, then it is in any case safe to
237 * call nfs_wb_page(), since there will be no pending writes.
239 if (igrab(inode
) != 0)
240 inode_referenced
= 1;
241 end_index
= inode
->i_size
>> PAGE_CACHE_SHIFT
;
243 /* Ensure we've flushed out any previous writes */
244 nfs_wb_page(inode
,page
);
247 if (page
->index
< end_index
)
249 /* things got complicated... */
250 offset
= inode
->i_size
& (PAGE_CACHE_SIZE
-1);
252 /* OK, are we completely out? */
254 if (page
->index
>= end_index
+1 || !offset
)
258 if (NFS_SERVER(inode
)->wsize
>= PAGE_CACHE_SIZE
&& !IS_SYNC(inode
) &&
260 err
= nfs_writepage_async(NULL
, inode
, page
, 0, offset
);
264 err
= nfs_writepage_sync(NULL
, inode
, page
, 0, offset
);
271 if (inode_referenced
)
277 * Check whether the file range we want to write to is locked by
281 region_locked(struct inode
*inode
, struct nfs_page
*req
)
283 struct file_lock
*fl
;
284 loff_t rqstart
, rqend
;
286 /* Don't optimize writes if we don't use NLM */
287 if (NFS_SERVER(inode
)->flags
& NFS_MOUNT_NONLM
)
290 rqstart
= page_offset(req
->wb_page
) + req
->wb_offset
;
291 rqend
= rqstart
+ req
->wb_bytes
;
292 for (fl
= inode
->i_flock
; fl
; fl
= fl
->fl_next
) {
293 if (fl
->fl_owner
== current
->files
&& (fl
->fl_flags
& FL_POSIX
)
294 && fl
->fl_type
== F_WRLCK
295 && fl
->fl_start
<= rqstart
&& rqend
<= fl
->fl_end
) {
304 * Insert a write request into an inode
305 * Note: we sort the list in order to be able to optimize nfs_find_request()
306 * & co. for the 'write append' case. For 2.5 we may want to consider
307 * some form of hashing so as to perform well on random writes.
310 nfs_inode_add_request(struct inode
*inode
, struct nfs_page
*req
)
312 struct list_head
*pos
, *head
;
313 unsigned long pg_idx
= page_index(req
->wb_page
);
315 if (!list_empty(&req
->wb_hash
))
317 if (!NFS_WBACK_BUSY(req
))
318 printk(KERN_ERR
"NFS: unlocked request attempted hashed!\n");
319 head
= &inode
->u
.nfs_i
.writeback
;
320 if (list_empty(head
))
322 list_for_each_prev(pos
, head
) {
323 struct nfs_page
*entry
= nfs_inode_wb_entry(pos
);
324 if (page_index(entry
->wb_page
) < pg_idx
)
327 inode
->u
.nfs_i
.npages
++;
328 list_add(&req
->wb_hash
, pos
);
333 * Insert a write request into an inode
336 nfs_inode_remove_request(struct nfs_page
*req
)
339 spin_lock(&nfs_wreq_lock
);
340 if (list_empty(&req
->wb_hash
)) {
341 spin_unlock(&nfs_wreq_lock
);
344 if (!NFS_WBACK_BUSY(req
))
345 printk(KERN_ERR
"NFS: unlocked request attempted unhashed!\n");
346 inode
= req
->wb_inode
;
347 list_del(&req
->wb_hash
);
348 INIT_LIST_HEAD(&req
->wb_hash
);
349 inode
->u
.nfs_i
.npages
--;
350 if ((inode
->u
.nfs_i
.npages
== 0) != list_empty(&inode
->u
.nfs_i
.writeback
))
351 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.npages.\n");
352 if (list_empty(&inode
->u
.nfs_i
.writeback
)) {
353 spin_unlock(&nfs_wreq_lock
);
356 spin_unlock(&nfs_wreq_lock
);
357 nfs_clear_request(req
);
358 nfs_release_request(req
);
364 static inline struct nfs_page
*
365 _nfs_find_request(struct inode
*inode
, struct page
*page
)
367 struct list_head
*head
, *pos
;
368 unsigned long pg_idx
= page_index(page
);
370 head
= &inode
->u
.nfs_i
.writeback
;
371 list_for_each_prev(pos
, head
) {
372 struct nfs_page
*req
= nfs_inode_wb_entry(pos
);
373 unsigned long found_idx
= page_index(req
->wb_page
);
375 if (pg_idx
< found_idx
)
377 if (pg_idx
!= found_idx
)
385 static struct nfs_page
*
386 nfs_find_request(struct inode
*inode
, struct page
*page
)
388 struct nfs_page
*req
;
390 spin_lock(&nfs_wreq_lock
);
391 req
= _nfs_find_request(inode
, page
);
392 spin_unlock(&nfs_wreq_lock
);
397 * Add a request to the inode's dirty list.
400 nfs_mark_request_dirty(struct nfs_page
*req
)
402 struct inode
*inode
= req
->wb_inode
;
404 spin_lock(&nfs_wreq_lock
);
405 nfs_list_add_request(req
, &inode
->u
.nfs_i
.dirty
);
406 inode
->u
.nfs_i
.ndirty
++;
408 __nfs_add_lru(&NFS_SERVER(inode
)->lru_dirty
, req
);
409 spin_unlock(&nfs_wreq_lock
);
410 mark_inode_dirty(inode
);
414 * Check if a request is dirty
417 nfs_dirty_request(struct nfs_page
*req
)
419 struct inode
*inode
= req
->wb_inode
;
420 return !list_empty(&req
->wb_list
) && req
->wb_list_head
== &inode
->u
.nfs_i
.dirty
;
425 * Add a request to the inode's commit list.
428 nfs_mark_request_commit(struct nfs_page
*req
)
430 struct inode
*inode
= req
->wb_inode
;
432 spin_lock(&nfs_wreq_lock
);
433 nfs_list_add_request(req
, &inode
->u
.nfs_i
.commit
);
434 inode
->u
.nfs_i
.ncommit
++;
436 __nfs_add_lru(&NFS_SERVER(inode
)->lru_commit
, req
);
437 spin_unlock(&nfs_wreq_lock
);
438 mark_inode_dirty(inode
);
443 * Wait for a request to complete.
445 * Interruptible by signals only if mounted with intr flag.
448 nfs_wait_on_requests(struct inode
*inode
, unsigned long idx_start
, unsigned int npages
)
450 struct list_head
*p
, *head
;
451 unsigned long idx_end
;
452 unsigned int res
= 0;
458 idx_end
= idx_start
+ npages
- 1;
460 head
= &inode
->u
.nfs_i
.writeback
;
462 spin_lock(&nfs_wreq_lock
);
463 list_for_each_prev(p
, head
) {
464 unsigned long pg_idx
;
465 struct nfs_page
*req
= nfs_inode_wb_entry(p
);
467 pg_idx
= page_index(req
->wb_page
);
468 if (pg_idx
< idx_start
)
470 if (pg_idx
> idx_end
)
473 if (!NFS_WBACK_BUSY(req
))
476 spin_unlock(&nfs_wreq_lock
);
477 error
= nfs_wait_on_request(req
);
478 nfs_release_request(req
);
484 spin_unlock(&nfs_wreq_lock
);
489 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
490 * @server: NFS superblock data
491 * @dst: destination list
493 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
494 * The elements are checked to ensure that they form a contiguous set
495 * of pages, and that they originated from the same file.
498 nfs_scan_lru_dirty_timeout(struct nfs_server
*server
, struct list_head
*dst
)
503 npages
= nfs_scan_lru_timeout(&server
->lru_dirty
, dst
, server
->wpages
);
505 inode
= nfs_list_entry(dst
->next
)->wb_inode
;
506 inode
->u
.nfs_i
.ndirty
-= npages
;
512 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
513 * @server: NFS superblock data
514 * @dst: destination list
516 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
517 * The elements are checked to ensure that they form a contiguous set
518 * of pages, and that they originated from the same file.
521 nfs_scan_lru_dirty(struct nfs_server
*server
, struct list_head
*dst
)
526 npages
= nfs_scan_lru(&server
->lru_dirty
, dst
, server
->wpages
);
528 inode
= nfs_list_entry(dst
->next
)->wb_inode
;
529 inode
->u
.nfs_i
.ndirty
-= npages
;
535 * nfs_scan_dirty - Scan an inode for dirty requests
536 * @inode: NFS inode to scan
537 * @dst: destination list
538 * @idx_start: lower bound of page->index to scan.
539 * @npages: idx_start + npages sets the upper bound to scan.
541 * Moves requests from the inode's dirty page list.
542 * The requests are *not* checked to ensure that they form a contiguous set.
545 nfs_scan_dirty(struct inode
*inode
, struct list_head
*dst
, unsigned long idx_start
, unsigned int npages
)
548 res
= nfs_scan_list(&inode
->u
.nfs_i
.dirty
, dst
, idx_start
, npages
);
549 inode
->u
.nfs_i
.ndirty
-= res
;
550 if ((inode
->u
.nfs_i
.ndirty
== 0) != list_empty(&inode
->u
.nfs_i
.dirty
))
551 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
557 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
558 * @server: NFS superblock data
559 * @dst: destination list
561 * Finds the first a timed out request in the NFS commit LRU list and moves it
562 * to the list dst. If such an element is found, we move all other commit
563 * requests that apply to the same inode.
564 * The assumption is that doing everything in a single commit-to-disk is
565 * the cheaper alternative.
568 nfs_scan_lru_commit_timeout(struct nfs_server
*server
, struct list_head
*dst
)
573 npages
= nfs_scan_lru_timeout(&server
->lru_commit
, dst
, 1);
575 inode
= nfs_list_entry(dst
->next
)->wb_inode
;
576 npages
+= nfs_scan_list(&inode
->u
.nfs_i
.commit
, dst
, 0, 0);
577 inode
->u
.nfs_i
.ncommit
-= npages
;
584 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
585 * @server: NFS superblock data
586 * @dst: destination list
588 * Finds the first request in the NFS commit LRU list and moves it
589 * to the list dst. If such an element is found, we move all other commit
590 * requests that apply to the same inode.
591 * The assumption is that doing everything in a single commit-to-disk is
592 * the cheaper alternative.
595 nfs_scan_lru_commit(struct nfs_server
*server
, struct list_head
*dst
)
600 npages
= nfs_scan_lru(&server
->lru_commit
, dst
, 1);
602 inode
= nfs_list_entry(dst
->next
)->wb_inode
;
603 npages
+= nfs_scan_list(&inode
->u
.nfs_i
.commit
, dst
, 0, 0);
604 inode
->u
.nfs_i
.ncommit
-= npages
;
610 * nfs_scan_commit - Scan an inode for commit requests
611 * @inode: NFS inode to scan
612 * @dst: destination list
613 * @idx_start: lower bound of page->index to scan.
614 * @npages: idx_start + npages sets the upper bound to scan.
616 * Moves requests from the inode's 'commit' request list.
617 * The requests are *not* checked to ensure that they form a contiguous set.
620 nfs_scan_commit(struct inode
*inode
, struct list_head
*dst
, unsigned long idx_start
, unsigned int npages
)
623 res
= nfs_scan_list(&inode
->u
.nfs_i
.commit
, dst
, idx_start
, npages
);
624 inode
->u
.nfs_i
.ncommit
-= res
;
625 if ((inode
->u
.nfs_i
.ncommit
== 0) != list_empty(&inode
->u
.nfs_i
.commit
))
626 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
633 * Try to update any existing write request, or create one if there is none.
634 * In order to match, the request's credentials must match those of
635 * the calling process.
637 * Note: Should always be called with the Page Lock held!
639 static struct nfs_page
*
640 nfs_update_request(struct file
* file
, struct inode
*inode
, struct page
*page
,
641 unsigned int offset
, unsigned int bytes
)
643 struct nfs_page
*req
, *new = NULL
;
644 unsigned long rqend
, end
;
646 end
= offset
+ bytes
;
649 /* Loop over all inode entries and see if we find
650 * A request for the page we wish to update
652 spin_lock(&nfs_wreq_lock
);
653 req
= _nfs_find_request(inode
, page
);
655 if (!nfs_lock_request_dontget(req
)) {
657 spin_unlock(&nfs_wreq_lock
);
658 error
= nfs_wait_on_request(req
);
659 nfs_release_request(req
);
662 nfs_release_request(new);
663 return ERR_PTR(error
);
667 spin_unlock(&nfs_wreq_lock
);
669 nfs_release_request(new);
674 nfs_lock_request_dontget(new);
675 nfs_inode_add_request(inode
, new);
676 spin_unlock(&nfs_wreq_lock
);
677 nfs_mark_request_dirty(new);
680 spin_unlock(&nfs_wreq_lock
);
682 new = nfs_create_request(nfs_file_cred(file
), inode
, page
, offset
, bytes
);
689 /* If the region is locked, adjust the timeout */
690 if (region_locked(inode
, new))
691 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_LOCKDELAY
;
693 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_DELAY
;
696 /* We have a request for our page.
697 * If the creds don't match, or the
698 * page addresses don't match,
699 * tell the caller to wait on the conflicting
702 rqend
= req
->wb_offset
+ req
->wb_bytes
;
703 if (req
->wb_file
!= file
704 || req
->wb_page
!= page
705 || !nfs_dirty_request(req
)
706 || offset
> rqend
|| end
< req
->wb_offset
) {
707 nfs_unlock_request(req
);
708 return ERR_PTR(-EBUSY
);
711 /* Okay, the request matches. Update the region */
712 if (offset
< req
->wb_offset
) {
713 req
->wb_offset
= offset
;
714 req
->wb_bytes
= rqend
- req
->wb_offset
;
718 req
->wb_bytes
= end
- req
->wb_offset
;
724 * This is the strategy routine for NFS.
725 * It is called by nfs_updatepage whenever the user wrote up to the end
728 * We always try to submit a set of requests in parallel so that the
729 * server's write code can gather writes. This is mainly for the benefit
732 * We never submit more requests than we think the remote can handle.
733 * For UDP sockets, we make sure we don't exceed the congestion window;
734 * for TCP, we limit the number of requests to 8.
736 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
737 * should be sent out in one go. This is for the benefit of NFSv2 servers
738 * that perform write gathering.
740 * FIXME: Different servers may have different sweet spots.
741 * Record the average congestion window in server struct?
743 #define NFS_STRATEGY_PAGES 8
745 nfs_strategy(struct inode
*inode
)
747 unsigned int dirty
, wpages
;
749 dirty
= inode
->u
.nfs_i
.ndirty
;
750 wpages
= NFS_SERVER(inode
)->wpages
;
752 if (NFS_PROTO(inode
)->version
== 2) {
753 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
754 nfs_flush_file(inode
, 0, 0, 0);
755 } else if (dirty
>= wpages
)
756 nfs_flush_file(inode
, 0, 0, 0);
758 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
759 nfs_flush_file(inode
, 0, 0, 0);
764 nfs_flush_incompatible(struct file
*file
, struct page
*page
)
766 struct rpc_cred
*cred
= nfs_file_cred(file
);
767 struct inode
*inode
= page
->mapping
->host
;
768 struct nfs_page
*req
;
771 * Look for a request corresponding to this page. If there
772 * is one, and it belongs to another file, we flush it out
773 * before we try to copy anything into the page. Do this
774 * due to the lack of an ACCESS-type call in NFSv2.
775 * Also do the same if we find a request from an existing
778 req
= nfs_find_request(inode
,page
);
780 if (req
->wb_file
!= file
|| req
->wb_cred
!= cred
|| req
->wb_page
!= page
)
781 status
= nfs_wb_page(inode
, page
);
782 nfs_release_request(req
);
784 return (status
< 0) ? status
: 0;
788 * Update and possibly write a cached page of an NFS file.
790 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
791 * things with a page scheduled for an RPC call (e.g. invalidate it).
794 nfs_updatepage(struct file
*file
, struct page
*page
, unsigned int offset
, unsigned int count
)
796 struct dentry
*dentry
= file
->f_dentry
;
797 struct inode
*inode
= page
->mapping
->host
;
798 struct nfs_page
*req
;
802 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
803 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
804 count
, (long long)(page_offset(page
) +offset
));
807 * If wsize is smaller than page size, update and write
808 * page synchronously.
810 if (NFS_SERVER(inode
)->wsize
< PAGE_CACHE_SIZE
|| IS_SYNC(inode
)) {
811 status
= nfs_writepage_sync(file
, inode
, page
, offset
, count
);
813 if (offset
== 0 && status
== PAGE_CACHE_SIZE
)
814 SetPageUptodate(page
);
821 * Try to find an NFS request corresponding to this page
823 * If the existing request cannot be updated, we must flush
827 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
828 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
829 if (status
!= -EBUSY
)
831 /* Request could not be updated. Flush it out and try again */
832 status
= nfs_wb_page(inode
, page
);
833 } while (status
>= 0);
838 end
= ((loff_t
)page
->index
<<PAGE_CACHE_SHIFT
) + (loff_t
)(offset
+ count
);
839 if (inode
->i_size
< end
)
842 /* If we wrote past the end of the page.
843 * Call the strategy routine so it can send out a bunch
846 if (req
->wb_offset
== 0 && req
->wb_bytes
== PAGE_CACHE_SIZE
) {
847 SetPageUptodate(page
);
848 nfs_unlock_request(req
);
851 nfs_unlock_request(req
);
853 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
854 status
, (long long)inode
->i_size
);
856 ClearPageUptodate(page
);
861 * Set up the argument/result storage required for the RPC call.
864 nfs_write_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
866 struct nfs_page
*req
;
870 /* Set up the RPC argument and reply structs
871 * NB: take care not to mess about with data->commit et al. */
873 pages
= data
->args
.pages
;
875 while (!list_empty(head
)) {
876 struct nfs_page
*req
= nfs_list_entry(head
->next
);
877 nfs_list_remove_request(req
);
878 nfs_list_add_request(req
, &data
->pages
);
879 *pages
++ = req
->wb_page
;
880 count
+= req
->wb_bytes
;
882 req
= nfs_list_entry(data
->pages
.next
);
883 data
->inode
= req
->wb_inode
;
884 data
->cred
= req
->wb_cred
;
885 data
->args
.fh
= NFS_FH(req
->wb_inode
);
886 data
->args
.offset
= page_offset(req
->wb_page
) + req
->wb_offset
;
887 data
->args
.pgbase
= req
->wb_offset
;
888 data
->args
.count
= count
;
889 data
->res
.fattr
= &data
->fattr
;
890 data
->res
.count
= count
;
891 data
->res
.verf
= &data
->verf
;
896 * Create an RPC task for the given write request and kick it.
897 * The page must have been locked by the caller.
899 * It may happen that the page we're passed is not marked dirty.
900 * This is the case if nfs_updatepage detects a conflicting request
901 * that has been written but not committed.
904 nfs_flush_one(struct list_head
*head
, struct inode
*inode
, int how
)
906 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
907 struct nfs_write_data
*data
;
908 struct rpc_task
*task
;
909 struct rpc_message msg
;
911 nfsvers
= NFS_PROTO(inode
)->version
,
912 async
= !(how
& FLUSH_SYNC
),
913 stable
= (how
& FLUSH_STABLE
);
917 data
= nfs_writedata_alloc();
922 /* Set the initial flags for the task. */
923 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
925 /* Set up the argument struct */
926 nfs_write_rpcsetup(head
, data
);
928 data
->args
.stable
= NFS_FILE_SYNC
;
930 if (!inode
->u
.nfs_i
.ncommit
)
931 data
->args
.stable
= NFS_FILE_SYNC
;
933 data
->args
.stable
= NFS_DATA_SYNC
;
935 data
->args
.stable
= NFS_UNSTABLE
;
937 /* Finalize the task. */
938 rpc_init_task(task
, clnt
, nfs_writeback_done
, flags
);
939 task
->tk_calldata
= data
;
940 /* Release requests */
941 task
->tk_release
= nfs_writedata_release
;
944 msg
.rpc_proc
= (nfsvers
== 3) ? NFS3PROC_WRITE
: NFSPROC_WRITE
;
946 msg
.rpc_proc
= NFSPROC_WRITE
;
948 msg
.rpc_argp
= &data
->args
;
949 msg
.rpc_resp
= &data
->res
;
950 msg
.rpc_cred
= data
->cred
;
952 dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
955 (long long)NFS_FILEID(inode
),
958 rpc_clnt_sigmask(clnt
, &oldset
);
959 rpc_call_setup(task
, &msg
, 0);
963 rpc_clnt_sigunmask(clnt
, &oldset
);
966 while (!list_empty(head
)) {
967 struct nfs_page
*req
= nfs_list_entry(head
->next
);
968 nfs_list_remove_request(req
);
969 nfs_mark_request_dirty(req
);
970 nfs_unlock_request(req
);
976 nfs_flush_list(struct list_head
*head
, int wpages
, int how
)
978 LIST_HEAD(one_request
);
979 struct nfs_page
*req
;
981 unsigned int pages
= 0;
983 while (!list_empty(head
)) {
984 pages
+= nfs_coalesce_requests(head
, &one_request
, wpages
);
985 req
= nfs_list_entry(one_request
.next
);
986 error
= nfs_flush_one(&one_request
, req
->wb_inode
, how
);
993 while (!list_empty(head
)) {
994 req
= nfs_list_entry(head
->next
);
995 nfs_list_remove_request(req
);
996 nfs_mark_request_dirty(req
);
997 nfs_unlock_request(req
);
1004 * This function is called when the WRITE call is complete.
1007 nfs_writeback_done(struct rpc_task
*task
)
1009 struct nfs_write_data
*data
= (struct nfs_write_data
*) task
->tk_calldata
;
1010 struct nfs_writeargs
*argp
= &data
->args
;
1011 struct nfs_writeres
*resp
= &data
->res
;
1012 struct inode
*inode
= data
->inode
;
1013 struct nfs_page
*req
;
1016 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1017 task
->tk_pid
, task
->tk_status
);
1019 if (nfs_async_handle_jukebox(task
))
1022 /* We can't handle that yet but we check for it nevertheless */
1023 if (resp
->count
< argp
->count
&& task
->tk_status
>= 0) {
1024 static unsigned long complain
;
1025 if (time_before(complain
, jiffies
)) {
1027 "NFS: Server wrote less than requested.\n");
1028 complain
= jiffies
+ 300 * HZ
;
1030 /* Can't do anything about it right now except throw
1032 task
->tk_status
= -EIO
;
1034 #ifdef CONFIG_NFS_V3
1035 if (resp
->verf
->committed
< argp
->stable
&& task
->tk_status
>= 0) {
1036 /* We tried a write call, but the server did not
1037 * commit data to stable storage even though we
1039 * Note: There is a known bug in Tru64 < 5.0 in which
1040 * the server reports NFS_DATA_SYNC, but performs
1041 * NFS_FILE_SYNC. We therefore implement this checking
1042 * as a dprintk() in order to avoid filling syslog.
1044 static unsigned long complain
;
1046 if (time_before(complain
, jiffies
)) {
1047 dprintk("NFS: faulty NFSv3 server %s:"
1048 " (committed = %d) != (stable = %d)\n",
1049 NFS_SERVER(inode
)->hostname
,
1050 resp
->verf
->committed
, argp
->stable
);
1051 complain
= jiffies
+ 300 * HZ
;
1057 * Update attributes as result of writeback.
1058 * FIXME: There is an inherent race with invalidate_inode_pages and
1059 * writebacks since the page->count is kept > 1 for as long
1060 * as the page has a write request pending.
1062 nfs_write_attributes(inode
, resp
->fattr
);
1063 while (!list_empty(&data
->pages
)) {
1064 req
= nfs_list_entry(data
->pages
.next
);
1065 nfs_list_remove_request(req
);
1066 page
= req
->wb_page
;
1068 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1069 req
->wb_inode
->i_dev
,
1070 (long long)NFS_FILEID(req
->wb_inode
),
1072 (long long)(page_offset(page
) + req
->wb_offset
));
1074 if (task
->tk_status
< 0) {
1075 ClearPageUptodate(page
);
1078 req
->wb_file
->f_error
= task
->tk_status
;
1079 nfs_inode_remove_request(req
);
1080 dprintk(", error = %d\n", task
->tk_status
);
1084 #ifdef CONFIG_NFS_V3
1085 if (argp
->stable
!= NFS_UNSTABLE
|| resp
->verf
->committed
== NFS_FILE_SYNC
) {
1086 nfs_inode_remove_request(req
);
1090 memcpy(&req
->wb_verf
, resp
->verf
, sizeof(req
->wb_verf
));
1091 req
->wb_timeout
= jiffies
+ NFS_COMMIT_DELAY
;
1092 nfs_mark_request_commit(req
);
1093 dprintk(" marked for commit\n");
1095 nfs_inode_remove_request(req
);
1098 nfs_unlock_request(req
);
1103 #ifdef CONFIG_NFS_V3
1105 * Set up the argument/result storage required for the RPC call.
1108 nfs_commit_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
1110 struct nfs_page
*first
, *last
;
1111 struct inode
*inode
;
1112 loff_t start
, end
, len
;
1114 /* Set up the RPC argument and reply structs
1115 * NB: take care not to mess about with data->commit et al. */
1117 list_splice(head
, &data
->pages
);
1118 INIT_LIST_HEAD(head
);
1119 first
= nfs_list_entry(data
->pages
.next
);
1120 last
= nfs_list_entry(data
->pages
.prev
);
1121 inode
= first
->wb_inode
;
1124 * Determine the offset range of requests in the COMMIT call.
1125 * We rely on the fact that data->pages is an ordered list...
1127 start
= page_offset(first
->wb_page
) + first
->wb_offset
;
1128 end
= page_offset(last
->wb_page
) + (last
->wb_offset
+ last
->wb_bytes
);
1130 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1131 if (end
>= inode
->i_size
|| len
< 0 || len
> (~((u32
)0) >> 1))
1134 data
->inode
= inode
;
1135 data
->cred
= first
->wb_cred
;
1136 data
->args
.fh
= NFS_FH(inode
);
1137 data
->args
.offset
= start
;
1138 data
->res
.count
= data
->args
.count
= (u32
)len
;
1139 data
->res
.fattr
= &data
->fattr
;
1140 data
->res
.verf
= &data
->verf
;
1144 * Commit dirty pages
1147 nfs_commit_list(struct list_head
*head
, int how
)
1149 struct rpc_message msg
;
1150 struct rpc_clnt
*clnt
;
1151 struct nfs_write_data
*data
;
1152 struct rpc_task
*task
;
1153 struct nfs_page
*req
;
1155 async
= !(how
& FLUSH_SYNC
);
1158 data
= nfs_writedata_alloc();
1164 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
1166 /* Set up the argument struct */
1167 nfs_commit_rpcsetup(head
, data
);
1168 req
= nfs_list_entry(data
->pages
.next
);
1169 clnt
= NFS_CLIENT(req
->wb_inode
);
1171 rpc_init_task(task
, clnt
, nfs_commit_done
, flags
);
1172 task
->tk_calldata
= data
;
1173 /* Release requests */
1174 task
->tk_release
= nfs_writedata_release
;
1176 msg
.rpc_proc
= NFS3PROC_COMMIT
;
1177 msg
.rpc_argp
= &data
->args
;
1178 msg
.rpc_resp
= &data
->res
;
1179 msg
.rpc_cred
= data
->cred
;
1181 dprintk("NFS: %4d initiated commit call\n", task
->tk_pid
);
1182 rpc_clnt_sigmask(clnt
, &oldset
);
1183 rpc_call_setup(task
, &msg
, 0);
1187 rpc_clnt_sigunmask(clnt
, &oldset
);
1190 while (!list_empty(head
)) {
1191 req
= nfs_list_entry(head
->next
);
1192 nfs_list_remove_request(req
);
1193 nfs_mark_request_commit(req
);
1194 nfs_unlock_request(req
);
1200 * COMMIT call returned
1203 nfs_commit_done(struct rpc_task
*task
)
1205 struct nfs_write_data
*data
= (struct nfs_write_data
*)task
->tk_calldata
;
1206 struct nfs_writeres
*resp
= &data
->res
;
1207 struct nfs_page
*req
;
1208 struct inode
*inode
= data
->inode
;
1210 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1211 task
->tk_pid
, task
->tk_status
);
1213 if (nfs_async_handle_jukebox(task
))
1216 nfs_write_attributes(inode
, resp
->fattr
);
1217 while (!list_empty(&data
->pages
)) {
1218 req
= nfs_list_entry(data
->pages
.next
);
1219 nfs_list_remove_request(req
);
1221 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1222 req
->wb_inode
->i_dev
,
1223 (long long)NFS_FILEID(req
->wb_inode
),
1225 (long long)(page_offset(req
->wb_page
) + req
->wb_offset
));
1226 if (task
->tk_status
< 0) {
1228 req
->wb_file
->f_error
= task
->tk_status
;
1229 nfs_inode_remove_request(req
);
1230 dprintk(", error = %d\n", task
->tk_status
);
1234 /* Okay, COMMIT succeeded, apparently. Check the verifier
1235 * returned by the server against all stored verfs. */
1236 if (!memcmp(req
->wb_verf
.verifier
, data
->verf
.verifier
, sizeof(data
->verf
.verifier
))) {
1237 /* We have a match */
1238 nfs_inode_remove_request(req
);
1242 /* We have a mismatch. Write the page again */
1243 dprintk(" mismatch\n");
1244 nfs_mark_request_dirty(req
);
1246 nfs_unlock_request(req
);
1251 int nfs_flush_file(struct inode
*inode
, unsigned long idx_start
,
1252 unsigned int npages
, int how
)
1258 spin_lock(&nfs_wreq_lock
);
1259 res
= nfs_scan_dirty(inode
, &head
, idx_start
, npages
);
1260 spin_unlock(&nfs_wreq_lock
);
1262 error
= nfs_flush_list(&head
, NFS_SERVER(inode
)->wpages
, how
);
1268 #ifdef CONFIG_NFS_V3
1269 int nfs_commit_file(struct inode
*inode
, int how
)
1275 spin_lock(&nfs_wreq_lock
);
1276 res
= nfs_scan_commit(inode
, &head
, 0, 0);
1277 spin_unlock(&nfs_wreq_lock
);
1279 error
= nfs_commit_list(&head
, how
);
1286 int nfs_sync_file(struct inode
*inode
, unsigned long idx_start
,
1287 unsigned int npages
, int how
)
1292 wait
= how
& FLUSH_WAIT
;
1298 error
= nfs_wait_on_requests(inode
, idx_start
, npages
);
1300 error
= nfs_flush_file(inode
, idx_start
, npages
, how
);
1301 #ifdef CONFIG_NFS_V3
1303 error
= nfs_commit_file(inode
, how
);
1305 } while (error
> 0);
1309 int nfs_init_writepagecache(void)
1311 nfs_wdata_cachep
= kmem_cache_create("nfs_write_data",
1312 sizeof(struct nfs_write_data
),
1313 0, SLAB_HWCACHE_ALIGN
,
1315 if (nfs_wdata_cachep
== NULL
)
1321 void nfs_destroy_writepagecache(void)
1323 if (kmem_cache_destroy(nfs_wdata_cachep
))
1324 printk(KERN_INFO
"nfs_write_data: not all structures were freed\n");