4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/malloc.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
64 #define NFS_PARANOIA 1
65 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
70 spinlock_t nfs_wreq_lock
= SPIN_LOCK_UNLOCKED
;
71 static atomic_t nfs_nr_requests
= ATOMIC_INIT(0);
76 * This is the struct where the WRITE/COMMIT arguments go.
78 struct nfs_write_data
{
81 struct rpc_cred
*cred
;
82 struct nfs_writeargs args
; /* argument struct */
83 struct nfs_writeres res
; /* result struct */
84 struct nfs_fattr fattr
;
85 struct nfs_writeverf verf
;
86 struct list_head pages
; /* Coalesced requests we wish to flush */
90 * Local function declarations
92 static struct nfs_page
* nfs_update_request(struct file
*, struct inode
*,
94 unsigned int, unsigned int);
95 static void nfs_strategy(struct inode
*inode
);
96 static void nfs_writeback_done(struct rpc_task
*);
98 static void nfs_commit_done(struct rpc_task
*);
101 /* Hack for future NFS swap support */
103 # define IS_SWAPFILE(inode) (0)
106 static kmem_cache_t
*nfs_page_cachep
;
107 static kmem_cache_t
*nfs_wdata_cachep
;
109 static __inline__
struct nfs_page
*nfs_page_alloc(void)
112 p
= kmem_cache_alloc(nfs_page_cachep
, SLAB_KERNEL
);
114 memset(p
, 0, sizeof(*p
));
115 INIT_LIST_HEAD(&p
->wb_hash
);
116 INIT_LIST_HEAD(&p
->wb_list
);
117 init_waitqueue_head(&p
->wb_wait
);
122 static __inline__
void nfs_page_free(struct nfs_page
*p
)
124 kmem_cache_free(nfs_page_cachep
, p
);
127 static __inline__
struct nfs_write_data
*nfs_writedata_alloc(void)
129 struct nfs_write_data
*p
;
130 p
= kmem_cache_alloc(nfs_wdata_cachep
, SLAB_NFS
);
132 memset(p
, 0, sizeof(*p
));
133 INIT_LIST_HEAD(&p
->pages
);
138 static __inline__
void nfs_writedata_free(struct nfs_write_data
*p
)
140 kmem_cache_free(nfs_wdata_cachep
, p
);
143 static void nfs_writedata_release(struct rpc_task
*task
)
145 struct nfs_write_data
*wdata
= (struct nfs_write_data
*)task
->tk_calldata
;
146 nfs_writedata_free(wdata
);
150 * This function will be used to simulate weak cache consistency
151 * under NFSv2 when the NFSv3 attribute patch is included.
152 * For the moment, we just call nfs_refresh_inode().
154 static __inline__
int
155 nfs_write_attributes(struct inode
*inode
, struct nfs_fattr
*fattr
)
157 if ((fattr
->valid
& NFS_ATTR_FATTR
) && !(fattr
->valid
& NFS_ATTR_WCC
)) {
158 fattr
->pre_size
= NFS_CACHE_ISIZE(inode
);
159 fattr
->pre_mtime
= NFS_CACHE_MTIME(inode
);
160 fattr
->pre_ctime
= NFS_CACHE_CTIME(inode
);
161 fattr
->valid
|= NFS_ATTR_WCC
;
163 return nfs_refresh_inode(inode
, fattr
);
167 * Write a page synchronously.
168 * Offset is the data offset within the page.
171 nfs_writepage_sync(struct file
*file
, struct inode
*inode
, struct page
*page
,
172 unsigned int offset
, unsigned int count
)
174 struct dentry
*dentry
= file
->f_dentry
;
175 struct rpc_cred
*cred
= nfs_file_cred(file
);
177 unsigned int wsize
= NFS_SERVER(inode
)->wsize
;
178 int result
, refresh
= 0, written
= 0, flags
;
180 struct nfs_fattr fattr
;
181 struct nfs_writeverf verf
;
184 dprintk("NFS: nfs_writepage_sync(%s/%s %d@%Ld)\n",
185 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
186 count
, (long long)(page_offset(page
) + offset
));
188 buffer
= kmap(page
) + offset
;
189 base
= page_offset(page
) + offset
;
191 flags
= ((IS_SWAPFILE(inode
)) ? NFS_RW_SWAP
: 0) | NFS_RW_SYNC
;
194 if (count
< wsize
&& !IS_SWAPFILE(inode
))
197 result
= NFS_PROTO(inode
)->write(inode
, cred
, &fattr
, flags
,
198 base
, wsize
, buffer
, &verf
);
199 nfs_write_attributes(inode
, &fattr
);
202 /* Must mark the page invalid after I/O error */
203 ClearPageUptodate(page
);
207 printk("NFS: short write, wsize=%u, result=%d\n",
215 * If we've extended the file, update the inode
216 * now so we don't invalidate the cache.
218 if (base
> inode
->i_size
)
219 inode
->i_size
= base
;
223 ClearPageError(page
);
229 return written
? written
: result
;
233 nfs_writepage_async(struct file
*file
, struct inode
*inode
, struct page
*page
,
234 unsigned int offset
, unsigned int count
)
236 struct nfs_page
*req
;
239 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
240 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
243 nfs_release_request(req
);
250 * Write an mmapped page to the server.
253 nfs_writepage(struct file
*file
, struct page
*page
)
256 unsigned long end_index
;
257 unsigned offset
= PAGE_CACHE_SIZE
;
261 struct address_space
*mapping
= page
->mapping
;
264 inode
= (struct inode
*)mapping
->host
;
266 inode
= file
->f_dentry
->d_inode
;
269 end_index
= inode
->i_size
>> PAGE_CACHE_SHIFT
;
271 /* Ensure we've flushed out any previous writes */
272 nfs_wb_page(inode
,page
);
275 if (page
->index
< end_index
)
277 /* things got complicated... */
278 offset
= inode
->i_size
& (PAGE_CACHE_SIZE
-1);
279 /* OK, are we completely out? */
280 if (page
->index
>= end_index
+1 || !offset
)
283 if (!PageError(page
) && NFS_SERVER(inode
)->rsize
>= PAGE_CACHE_SIZE
) {
284 err
= nfs_writepage_async(file
, inode
, page
, 0, offset
);
288 err
= nfs_writepage_sync(file
, inode
, page
, 0, offset
);
297 * Check whether the file range we want to write to is locked by
301 region_locked(struct inode
*inode
, struct nfs_page
*req
)
303 struct file_lock
*fl
;
304 loff_t rqstart
, rqend
;
306 /* Don't optimize writes if we don't use NLM */
307 if (NFS_SERVER(inode
)->flags
& NFS_MOUNT_NONLM
)
310 rqstart
= page_offset(req
->wb_page
) + req
->wb_offset
;
311 rqend
= rqstart
+ req
->wb_bytes
;
312 for (fl
= inode
->i_flock
; fl
; fl
= fl
->fl_next
) {
313 if (fl
->fl_owner
== current
->files
&& (fl
->fl_flags
& FL_POSIX
)
314 && fl
->fl_type
== F_WRLCK
315 && fl
->fl_start
<= rqstart
&& rqend
<= fl
->fl_end
) {
324 * Insert a write request into an inode
327 nfs_inode_add_request(struct inode
*inode
, struct nfs_page
*req
)
329 if (!list_empty(&req
->wb_hash
))
331 if (!NFS_WBACK_BUSY(req
))
332 printk(KERN_ERR
"NFS: unlocked request attempted hashed!\n");
333 if (list_empty(&inode
->u
.nfs_i
.writeback
))
334 atomic_inc(&inode
->i_count
);
335 inode
->u
.nfs_i
.npages
++;
336 list_add(&req
->wb_hash
, &inode
->u
.nfs_i
.writeback
);
341 * Insert a write request into an inode
344 nfs_inode_remove_request(struct nfs_page
*req
)
347 spin_lock(&nfs_wreq_lock
);
348 if (list_empty(&req
->wb_hash
)) {
349 spin_unlock(&nfs_wreq_lock
);
352 if (!NFS_WBACK_BUSY(req
))
353 printk(KERN_ERR
"NFS: unlocked request attempted unhashed!\n");
354 inode
= req
->wb_inode
;
355 list_del(&req
->wb_hash
);
356 INIT_LIST_HEAD(&req
->wb_hash
);
357 inode
->u
.nfs_i
.npages
--;
358 if ((inode
->u
.nfs_i
.npages
== 0) != list_empty(&inode
->u
.nfs_i
.writeback
))
359 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.npages.\n");
360 if (list_empty(&inode
->u
.nfs_i
.writeback
))
362 if (!nfs_have_writebacks(inode
) && !nfs_have_read(inode
))
363 inode_remove_flushd(inode
);
364 spin_unlock(&nfs_wreq_lock
);
365 nfs_release_request(req
);
371 static inline struct nfs_page
*
372 _nfs_find_request(struct inode
*inode
, struct page
*page
)
374 struct list_head
*head
, *next
;
376 head
= &inode
->u
.nfs_i
.writeback
;
378 while (next
!= head
) {
379 struct nfs_page
*req
= nfs_inode_wb_entry(next
);
381 if (page_index(req
->wb_page
) != page_index(page
))
389 static struct nfs_page
*
390 nfs_find_request(struct inode
*inode
, struct page
*page
)
392 struct nfs_page
*req
;
394 spin_lock(&nfs_wreq_lock
);
395 req
= _nfs_find_request(inode
, page
);
396 spin_unlock(&nfs_wreq_lock
);
401 * Insert a write request into a sorted list
403 void nfs_list_add_request(struct nfs_page
*req
, struct list_head
*head
)
405 struct list_head
*prev
;
407 if (!list_empty(&req
->wb_list
)) {
408 printk(KERN_ERR
"NFS: Add to list failed!\n");
411 if (!NFS_WBACK_BUSY(req
))
412 printk(KERN_ERR
"NFS: unlocked request attempted added to list!\n");
414 while (prev
!= head
) {
415 struct nfs_page
*p
= nfs_list_entry(prev
);
416 if (page_index(p
->wb_page
) < page_index(req
->wb_page
))
420 list_add(&req
->wb_list
, prev
);
421 req
->wb_list_head
= head
;
425 * Insert a write request into an inode
427 void nfs_list_remove_request(struct nfs_page
*req
)
429 if (list_empty(&req
->wb_list
))
431 if (!NFS_WBACK_BUSY(req
))
432 printk(KERN_ERR
"NFS: unlocked request attempted removed from list!\n");
433 list_del(&req
->wb_list
);
434 INIT_LIST_HEAD(&req
->wb_list
);
435 req
->wb_list_head
= NULL
;
439 * Add a request to the inode's dirty list.
442 nfs_mark_request_dirty(struct nfs_page
*req
)
444 struct inode
*inode
= req
->wb_inode
;
446 spin_lock(&nfs_wreq_lock
);
447 if (list_empty(&req
->wb_list
)) {
448 nfs_list_add_request(req
, &inode
->u
.nfs_i
.dirty
);
449 inode
->u
.nfs_i
.ndirty
++;
451 spin_unlock(&nfs_wreq_lock
);
453 * NB: the call to inode_schedule_scan() must lie outside the
454 * spinlock since it can run flushd().
456 inode_schedule_scan(inode
, req
->wb_timeout
);
460 * Check if a request is dirty
463 nfs_dirty_request(struct nfs_page
*req
)
465 struct inode
*inode
= req
->wb_inode
;
466 return !list_empty(&req
->wb_list
) && req
->wb_list_head
== &inode
->u
.nfs_i
.dirty
;
471 * Add a request to the inode's commit list.
474 nfs_mark_request_commit(struct nfs_page
*req
)
476 struct inode
*inode
= req
->wb_inode
;
478 spin_lock(&nfs_wreq_lock
);
479 if (list_empty(&req
->wb_list
)) {
480 nfs_list_add_request(req
, &inode
->u
.nfs_i
.commit
);
481 inode
->u
.nfs_i
.ncommit
++;
483 spin_unlock(&nfs_wreq_lock
);
485 * NB: the call to inode_schedule_scan() must lie outside the
486 * spinlock since it can run flushd().
488 inode_schedule_scan(inode
, req
->wb_timeout
);
493 * Create a write request.
494 * Page must be locked by the caller. This makes sure we never create
495 * two different requests for the same page, and avoids possible deadlock
496 * when we reach the hard limit on the number of dirty pages.
497 * It should be safe to sleep here.
499 struct nfs_page
*nfs_create_request(struct file
*file
, struct inode
*inode
,
501 unsigned int offset
, unsigned int count
)
503 struct nfs_reqlist
*cache
= NFS_REQUESTLIST(inode
);
504 struct nfs_page
*req
= NULL
;
507 /* Deal with hard/soft limits.
510 /* If we're over the global soft limit, wake up all requests */
511 if (atomic_read(&nfs_nr_requests
) >= MAX_REQUEST_SOFT
) {
512 dprintk("NFS: hit soft limit (%d requests)\n",
513 atomic_read(&nfs_nr_requests
));
515 nfs_reqlist_init(NFS_SERVER(inode
));
519 /* If we haven't reached the local hard limit yet,
520 * try to allocate the request struct */
521 if (atomic_read(&cache
->nr_requests
) < MAX_REQUEST_HARD
) {
522 req
= nfs_page_alloc();
527 /* We're over the hard limit. Wait for better times */
528 dprintk("NFS: create_request sleeping (total %d pid %d)\n",
529 atomic_read(&cache
->nr_requests
), current
->pid
);
532 if (NFS_SERVER(inode
)->flags
& NFS_MOUNT_INTR
) {
533 interruptible_sleep_on_timeout(&cache
->request_wait
,
538 sleep_on_timeout(&cache
->request_wait
, timeout
);
540 dprintk("NFS: create_request waking up (tot %d pid %d)\n",
541 atomic_read(&cache
->nr_requests
), current
->pid
);
546 /* Initialize the request struct. Initially, we assume a
547 * long write-back delay. This will be adjusted in
548 * update_nfs_request below if the region is not locked. */
550 page_cache_get(page
);
551 req
->wb_offset
= offset
;
552 req
->wb_bytes
= count
;
555 /* If we have a struct file, use its cached credentials
556 * else cache the current process' credentials. */
559 req
->wb_cred
= nfs_file_cred(file
);
561 req
->wb_cred
= rpcauth_lookupcred(NFS_CLIENT(inode
)->cl_auth
, 0);
562 req
->wb_inode
= inode
;
565 /* register request's existence */
566 atomic_inc(&cache
->nr_requests
);
567 atomic_inc(&nfs_nr_requests
);
573 * Release all resources associated with a write request after it
574 * has been committed to stable storage
576 * Note: Should always be called with the spinlock held!
579 nfs_release_request(struct nfs_page
*req
)
581 struct inode
*inode
= req
->wb_inode
;
582 struct nfs_reqlist
*cache
= NFS_REQUESTLIST(inode
);
583 struct page
*page
= req
->wb_page
;
585 spin_lock(&nfs_wreq_lock
);
586 if (--req
->wb_count
) {
587 spin_unlock(&nfs_wreq_lock
);
590 spin_unlock(&nfs_wreq_lock
);
592 if (!list_empty(&req
->wb_list
)) {
593 printk(KERN_ERR
"NFS: Request released while still on a list!\n");
594 nfs_list_remove_request(req
);
596 if (!list_empty(&req
->wb_hash
)) {
597 printk(KERN_ERR
"NFS: Request released while still hashed!\n");
598 nfs_inode_remove_request(req
);
600 if (NFS_WBACK_BUSY(req
))
601 printk(KERN_ERR
"NFS: Request released while still locked!\n");
603 /* Release struct file or cached credential */
607 rpcauth_releasecred(NFS_CLIENT(inode
)->cl_auth
, req
->wb_cred
);
608 page_cache_release(page
);
610 /* wake up anyone waiting to allocate a request */
611 atomic_dec(&cache
->nr_requests
);
612 atomic_dec(&nfs_nr_requests
);
613 wake_up(&cache
->request_wait
);
615 if (atomic_read(&cache
->nr_requests
) < 0)
617 if (atomic_read(&nfs_nr_requests
) < 0)
623 * Wait for a request to complete.
625 * Interruptible by signals only if mounted with intr flag.
628 nfs_wait_on_request(struct nfs_page
*req
)
630 struct inode
*inode
= req
->wb_inode
;
631 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
633 if (!NFS_WBACK_BUSY(req
))
635 return nfs_wait_event(clnt
, req
->wb_wait
, !NFS_WBACK_BUSY(req
));
639 * Wait for a request to complete.
641 * Interruptible by signals only if mounted with intr flag.
644 nfs_wait_on_requests(struct inode
*inode
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
646 struct list_head
*p
, *head
;
647 unsigned long idx_end
;
648 unsigned int res
= 0;
654 idx_end
= idx_start
+ npages
- 1;
656 spin_lock(&nfs_wreq_lock
);
657 head
= &inode
->u
.nfs_i
.writeback
;
660 unsigned long pg_idx
;
661 struct nfs_page
*req
= nfs_inode_wb_entry(p
);
665 if (file
&& req
->wb_file
!= file
)
668 pg_idx
= page_index(req
->wb_page
);
669 if (pg_idx
< idx_start
|| pg_idx
> idx_end
)
672 if (!NFS_WBACK_BUSY(req
))
675 spin_unlock(&nfs_wreq_lock
);
676 error
= nfs_wait_on_request(req
);
677 nfs_release_request(req
);
680 spin_lock(&nfs_wreq_lock
);
684 spin_unlock(&nfs_wreq_lock
);
689 * Scan cluster for dirty pages and send as many of them to the
690 * server as possible.
692 int nfs_scan_list_timeout(struct list_head
*head
, struct list_head
*dst
, struct inode
*inode
)
695 struct nfs_page
*req
;
700 req
= nfs_list_entry(p
);
702 if (time_after(req
->wb_timeout
, jiffies
)) {
703 if (time_after(NFS_NEXTSCAN(inode
), req
->wb_timeout
))
704 NFS_NEXTSCAN(inode
) = req
->wb_timeout
;
707 if (!nfs_lock_request(req
))
709 nfs_list_remove_request(req
);
710 nfs_list_add_request(req
, dst
);
717 nfs_scan_dirty_timeout(struct inode
*inode
, struct list_head
*dst
)
720 spin_lock(&nfs_wreq_lock
);
721 pages
= nfs_scan_list_timeout(&inode
->u
.nfs_i
.dirty
, dst
, inode
);
722 inode
->u
.nfs_i
.ndirty
-= pages
;
723 if ((inode
->u
.nfs_i
.ndirty
== 0) != list_empty(&inode
->u
.nfs_i
.dirty
))
724 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
725 spin_unlock(&nfs_wreq_lock
);
731 nfs_scan_commit_timeout(struct inode
*inode
, struct list_head
*dst
)
734 spin_lock(&nfs_wreq_lock
);
735 pages
= nfs_scan_list_timeout(&inode
->u
.nfs_i
.commit
, dst
, inode
);
736 inode
->u
.nfs_i
.ncommit
-= pages
;
737 if ((inode
->u
.nfs_i
.ncommit
== 0) != list_empty(&inode
->u
.nfs_i
.commit
))
738 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
739 spin_unlock(&nfs_wreq_lock
);
744 int nfs_scan_list(struct list_head
*src
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
747 struct nfs_page
*req
;
748 unsigned long idx_end
;
755 idx_end
= idx_start
+ npages
- 1;
758 unsigned long pg_idx
;
760 req
= nfs_list_entry(p
);
763 if (file
&& req
->wb_file
!= file
)
766 pg_idx
= page_index(req
->wb_page
);
767 if (pg_idx
< idx_start
|| pg_idx
> idx_end
)
770 if (!nfs_lock_request(req
))
772 nfs_list_remove_request(req
);
773 nfs_list_add_request(req
, dst
);
780 nfs_scan_dirty(struct inode
*inode
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
783 spin_lock(&nfs_wreq_lock
);
784 res
= nfs_scan_list(&inode
->u
.nfs_i
.dirty
, dst
, file
, idx_start
, npages
);
785 inode
->u
.nfs_i
.ndirty
-= res
;
786 if ((inode
->u
.nfs_i
.ndirty
== 0) != list_empty(&inode
->u
.nfs_i
.dirty
))
787 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
788 spin_unlock(&nfs_wreq_lock
);
794 nfs_scan_commit(struct inode
*inode
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
797 spin_lock(&nfs_wreq_lock
);
798 res
= nfs_scan_list(&inode
->u
.nfs_i
.commit
, dst
, file
, idx_start
, npages
);
799 inode
->u
.nfs_i
.ncommit
-= res
;
800 if ((inode
->u
.nfs_i
.ncommit
== 0) != list_empty(&inode
->u
.nfs_i
.commit
))
801 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
802 spin_unlock(&nfs_wreq_lock
);
808 int nfs_coalesce_requests(struct list_head
*src
, struct list_head
*dst
, unsigned int maxpages
)
810 struct nfs_page
*req
= NULL
;
811 unsigned int pages
= 0;
813 while (!list_empty(src
)) {
814 struct nfs_page
*prev
= req
;
816 req
= nfs_list_entry(src
->next
);
818 if (req
->wb_file
!= prev
->wb_file
)
820 if (page_index(req
->wb_page
) != page_index(prev
->wb_page
)+1)
823 if (req
->wb_offset
!= 0)
826 nfs_list_remove_request(req
);
827 nfs_list_add_request(req
, dst
);
829 if (req
->wb_offset
+ req
->wb_bytes
!= PAGE_CACHE_SIZE
)
831 if (pages
>= maxpages
)
838 * Try to update any existing write request, or create one if there is none.
839 * In order to match, the request's credentials must match those of
840 * the calling process.
842 * Note: Should always be called with the Page Lock held!
844 static struct nfs_page
*
845 nfs_update_request(struct file
* file
, struct inode
*inode
, struct page
*page
,
846 unsigned int offset
, unsigned int bytes
)
848 struct nfs_page
*req
, *new = NULL
;
849 unsigned long rqend
, end
;
851 end
= offset
+ bytes
;
854 /* Loop over all inode entries and see if we find
855 * A request for the page we wish to update
857 spin_lock(&nfs_wreq_lock
);
858 req
= _nfs_find_request(inode
, page
);
860 if (!nfs_lock_request(req
)) {
861 spin_unlock(&nfs_wreq_lock
);
862 nfs_wait_on_request(req
);
863 nfs_release_request(req
);
866 spin_unlock(&nfs_wreq_lock
);
868 nfs_release_request(new);
874 nfs_lock_request(req
);
875 nfs_inode_add_request(inode
, req
);
876 spin_unlock(&nfs_wreq_lock
);
877 nfs_mark_request_dirty(req
);
880 spin_unlock(&nfs_wreq_lock
);
883 * If we're over the soft limit, flush out old requests
885 if (inode
->u
.nfs_i
.npages
>= MAX_REQUEST_SOFT
)
886 nfs_wb_file(inode
, file
);
887 new = nfs_create_request(file
, inode
, page
, offset
, bytes
);
889 return ERR_PTR(-ENOMEM
);
890 /* If the region is locked, adjust the timeout */
891 if (region_locked(inode
, new))
892 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_LOCKDELAY
;
894 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_DELAY
;
897 /* We have a request for our page.
898 * If the creds don't match, or the
899 * page addresses don't match,
900 * tell the caller to wait on the conflicting
903 rqend
= req
->wb_offset
+ req
->wb_bytes
;
904 if (req
->wb_file
!= file
905 || req
->wb_page
!= page
906 || !nfs_dirty_request(req
)
907 || offset
> rqend
|| end
< req
->wb_offset
) {
908 nfs_unlock_request(req
);
909 nfs_release_request(req
);
910 return ERR_PTR(-EBUSY
);
913 /* Okay, the request matches. Update the region */
914 if (offset
< req
->wb_offset
) {
915 req
->wb_offset
= offset
;
916 req
->wb_bytes
= rqend
- req
->wb_offset
;
920 req
->wb_bytes
= end
- req
->wb_offset
;
922 nfs_unlock_request(req
);
928 * This is the strategy routine for NFS.
929 * It is called by nfs_updatepage whenever the user wrote up to the end
932 * We always try to submit a set of requests in parallel so that the
933 * server's write code can gather writes. This is mainly for the benefit
936 * We never submit more requests than we think the remote can handle.
937 * For UDP sockets, we make sure we don't exceed the congestion window;
938 * for TCP, we limit the number of requests to 8.
940 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
941 * should be sent out in one go. This is for the benefit of NFSv2 servers
942 * that perform write gathering.
944 * FIXME: Different servers may have different sweet spots.
945 * Record the average congestion window in server struct?
947 #define NFS_STRATEGY_PAGES 8
949 nfs_strategy(struct inode
*inode
)
951 unsigned int dirty
, wpages
;
953 dirty
= inode
->u
.nfs_i
.ndirty
;
954 wpages
= NFS_SERVER(inode
)->wpages
;
956 if (NFS_PROTO(inode
)->version
== 2) {
957 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
958 nfs_flush_file(inode
, NULL
, 0, 0, 0);
961 nfs_flush_file(inode
, NULL
, 0, 0, 0);
962 if (inode
->u
.nfs_i
.ncommit
> NFS_STRATEGY_PAGES
* wpages
&&
963 atomic_read(&nfs_nr_requests
) > MAX_REQUEST_SOFT
)
964 nfs_commit_file(inode
, NULL
, 0, 0, 0);
967 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
968 nfs_flush_file(inode
, NULL
, 0, 0, 0);
971 * If we're running out of free requests, flush out everything
972 * in order to reduce memory useage...
974 if (inode
->u
.nfs_i
.npages
> MAX_REQUEST_SOFT
)
979 nfs_flush_incompatible(struct file
*file
, struct page
*page
)
981 struct inode
*inode
= file
->f_dentry
->d_inode
;
982 struct nfs_page
*req
;
985 * Look for a request corresponding to this page. If there
986 * is one, and it belongs to another file, we flush it out
987 * before we try to copy anything into the page. Do this
988 * due to the lack of an ACCESS-type call in NFSv2.
989 * Also do the same if we find a request from an existing
992 req
= nfs_find_request(inode
,page
);
994 if (req
->wb_file
!= file
|| req
->wb_page
!= page
)
995 status
= nfs_wb_page(inode
, page
);
996 nfs_release_request(req
);
998 return (status
< 0) ? status
: 0;
1002 * Update and possibly write a cached page of an NFS file.
1004 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
1005 * things with a page scheduled for an RPC call (e.g. invalidate it).
1008 nfs_updatepage(struct file
*file
, struct page
*page
, unsigned int offset
, unsigned int count
)
1010 struct dentry
*dentry
= file
->f_dentry
;
1011 struct inode
*inode
= dentry
->d_inode
;
1012 struct nfs_page
*req
;
1013 int synchronous
= file
->f_flags
& O_SYNC
;
1016 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
1017 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
1018 count
, (long long)(page_offset(page
) +offset
));
1021 * If wsize is smaller than page size, update and write
1022 * page synchronously.
1024 if (NFS_SERVER(inode
)->wsize
< PAGE_SIZE
)
1025 return nfs_writepage_sync(file
, inode
, page
, offset
, count
);
1028 * Try to find an NFS request corresponding to this page
1030 * If the existing request cannot be updated, we must flush
1034 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
1035 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
1036 if (status
!= -EBUSY
)
1038 /* Request could not be updated. Flush it out and try again */
1039 status
= nfs_wb_page(inode
, page
);
1040 } while (status
>= 0);
1044 if (req
->wb_bytes
== PAGE_CACHE_SIZE
)
1045 SetPageUptodate(page
);
1051 error
= nfs_sync_file(inode
, file
, page_index(page
), 1, FLUSH_SYNC
|FLUSH_STABLE
);
1052 if (error
< 0 || (error
= file
->f_error
) < 0)
1056 /* If we wrote past the end of the page.
1057 * Call the strategy routine so it can send out a bunch
1060 if (req
->wb_offset
== 0 && req
->wb_bytes
== PAGE_CACHE_SIZE
)
1061 nfs_strategy(inode
);
1063 nfs_release_request(req
);
1065 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
1066 status
, (long long)inode
->i_size
);
1068 ClearPageUptodate(page
);
1073 * Set up the argument/result storage required for the RPC call.
1076 nfs_write_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
1078 struct nfs_page
*req
;
1082 /* Set up the RPC argument and reply structs
1083 * NB: take care not to mess about with data->commit et al. */
1085 iov
= data
->args
.iov
;
1087 while (!list_empty(head
)) {
1088 struct nfs_page
*req
= nfs_list_entry(head
->next
);
1089 nfs_list_remove_request(req
);
1090 nfs_list_add_request(req
, &data
->pages
);
1091 iov
->iov_base
= kmap(req
->wb_page
) + req
->wb_offset
;
1092 iov
->iov_len
= req
->wb_bytes
;
1093 count
+= req
->wb_bytes
;
1097 req
= nfs_list_entry(data
->pages
.next
);
1098 data
->inode
= req
->wb_inode
;
1099 data
->cred
= req
->wb_cred
;
1100 data
->args
.fh
= NFS_FH(req
->wb_inode
);
1101 data
->args
.offset
= page_offset(req
->wb_page
) + req
->wb_offset
;
1102 data
->args
.count
= count
;
1103 data
->res
.fattr
= &data
->fattr
;
1104 data
->res
.count
= count
;
1105 data
->res
.verf
= &data
->verf
;
1110 * Create an RPC task for the given write request and kick it.
1111 * The page must have been locked by the caller.
1113 * It may happen that the page we're passed is not marked dirty.
1114 * This is the case if nfs_updatepage detects a conflicting request
1115 * that has been written but not committed.
1118 nfs_flush_one(struct list_head
*head
, struct inode
*inode
, int how
)
1120 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
1121 struct nfs_write_data
*data
;
1122 struct rpc_task
*task
;
1123 struct rpc_message msg
;
1125 async
= !(how
& FLUSH_SYNC
),
1126 stable
= (how
& FLUSH_STABLE
);
1130 data
= nfs_writedata_alloc();
1135 /* Set the initial flags for the task. */
1136 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
1138 /* Set up the argument struct */
1139 nfs_write_rpcsetup(head
, data
);
1141 if (!inode
->u
.nfs_i
.ncommit
)
1142 data
->args
.stable
= NFS_FILE_SYNC
;
1144 data
->args
.stable
= NFS_DATA_SYNC
;
1146 data
->args
.stable
= NFS_UNSTABLE
;
1148 /* Finalize the task. */
1149 rpc_init_task(task
, clnt
, nfs_writeback_done
, flags
);
1150 task
->tk_calldata
= data
;
1151 /* Release requests */
1152 task
->tk_release
= nfs_writedata_release
;
1154 #ifdef CONFIG_NFS_V3
1155 msg
.rpc_proc
= (NFS_PROTO(inode
)->version
== 3) ? NFS3PROC_WRITE
: NFSPROC_WRITE
;
1157 msg
.rpc_proc
= NFSPROC_WRITE
;
1159 msg
.rpc_argp
= &data
->args
;
1160 msg
.rpc_resp
= &data
->res
;
1161 msg
.rpc_cred
= data
->cred
;
1163 dprintk("NFS: %4d initiated write call (req %x/%Ld count %d nriov %d)\n",
1166 (long long)NFS_FILEID(inode
),
1167 data
->args
.count
, data
->args
.nriov
);
1169 rpc_clnt_sigmask(clnt
, &oldset
);
1170 rpc_call_setup(task
, &msg
, 0);
1172 rpc_clnt_sigunmask(clnt
, &oldset
);
1175 while (!list_empty(head
)) {
1176 struct nfs_page
*req
= nfs_list_entry(head
->next
);
1177 nfs_list_remove_request(req
);
1178 nfs_mark_request_dirty(req
);
1179 nfs_unlock_request(req
);
1185 nfs_flush_list(struct inode
*inode
, struct list_head
*head
, int how
)
1187 LIST_HEAD(one_request
);
1188 struct nfs_page
*req
;
1190 unsigned int pages
= 0,
1191 wpages
= NFS_SERVER(inode
)->wpages
;
1193 while (!list_empty(head
)) {
1194 pages
+= nfs_coalesce_requests(head
, &one_request
, wpages
);
1195 req
= nfs_list_entry(one_request
.next
);
1196 error
= nfs_flush_one(&one_request
, req
->wb_inode
, how
);
1203 while (!list_empty(head
)) {
1204 req
= nfs_list_entry(head
->next
);
1205 nfs_list_remove_request(req
);
1206 nfs_mark_request_dirty(req
);
1207 nfs_unlock_request(req
);
1214 * This function is called when the WRITE call is complete.
1217 nfs_writeback_done(struct rpc_task
*task
)
1219 struct nfs_write_data
*data
= (struct nfs_write_data
*) task
->tk_calldata
;
1220 struct nfs_writeargs
*argp
= &data
->args
;
1221 struct nfs_writeres
*resp
= &data
->res
;
1222 struct inode
*inode
= data
->inode
;
1223 struct nfs_page
*req
;
1226 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1227 task
->tk_pid
, task
->tk_status
);
1229 /* We can't handle that yet but we check for it nevertheless */
1230 if (resp
->count
< argp
->count
&& task
->tk_status
>= 0) {
1231 static unsigned long complain
;
1232 if (time_before(complain
, jiffies
)) {
1234 "NFS: Server wrote less than requested.\n");
1235 complain
= jiffies
+ 300 * HZ
;
1237 /* Can't do anything about it right now except throw
1239 task
->tk_status
= -EIO
;
1241 #ifdef CONFIG_NFS_V3
1242 if (resp
->verf
->committed
< argp
->stable
&& task
->tk_status
>= 0) {
1243 /* We tried a write call, but the server did not
1244 * commit data to stable storage even though we
1246 * Note: There is a known bug in Tru64 < 5.0 in which
1247 * the server reports NFS_DATA_SYNC, but performs
1248 * NFS_FILE_SYNC. We therefore implement this checking
1249 * as a dprintk() in order to avoid filling syslog.
1251 static unsigned long complain
;
1253 if (time_before(complain
, jiffies
)) {
1254 dprintk("NFS: faulty NFSv3 server %s:"
1255 " (committed = %d) != (stable = %d)\n",
1256 NFS_SERVER(inode
)->hostname
,
1257 resp
->verf
->committed
, argp
->stable
);
1258 complain
= jiffies
+ 300 * HZ
;
1264 * Update attributes as result of writeback.
1265 * FIXME: There is an inherent race with invalidate_inode_pages and
1266 * writebacks since the page->count is kept > 1 for as long
1267 * as the page has a write request pending.
1269 nfs_write_attributes(inode
, resp
->fattr
);
1270 while (!list_empty(&data
->pages
)) {
1271 req
= nfs_list_entry(data
->pages
.next
);
1272 nfs_list_remove_request(req
);
1273 page
= req
->wb_page
;
1277 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1278 req
->wb_inode
->i_dev
,
1279 (long long)NFS_FILEID(req
->wb_inode
),
1281 (long long)(page_offset(page
) + req
->wb_offset
));
1283 if (task
->tk_status
< 0) {
1284 ClearPageUptodate(page
);
1287 req
->wb_file
->f_error
= task
->tk_status
;
1288 nfs_inode_remove_request(req
);
1289 dprintk(", error = %d\n", task
->tk_status
);
1293 #ifdef CONFIG_NFS_V3
1294 if (resp
->verf
->committed
!= NFS_UNSTABLE
) {
1295 nfs_inode_remove_request(req
);
1299 memcpy(&req
->wb_verf
, resp
->verf
, sizeof(req
->wb_verf
));
1300 req
->wb_timeout
= jiffies
+ NFS_COMMIT_DELAY
;
1301 nfs_mark_request_commit(req
);
1302 dprintk(" marked for commit\n");
1304 nfs_inode_remove_request(req
);
1307 nfs_unlock_request(req
);
1312 #ifdef CONFIG_NFS_V3
1314 * Set up the argument/result storage required for the RPC call.
1317 nfs_commit_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
1319 struct nfs_page
*first
, *last
;
1320 struct inode
*inode
;
1321 loff_t start
, end
, len
;
1323 /* Set up the RPC argument and reply structs
1324 * NB: take care not to mess about with data->commit et al. */
1326 list_splice(head
, &data
->pages
);
1327 INIT_LIST_HEAD(head
);
1328 first
= nfs_list_entry(data
->pages
.next
);
1329 last
= nfs_list_entry(data
->pages
.prev
);
1330 inode
= first
->wb_inode
;
1333 * Determine the offset range of requests in the COMMIT call.
1334 * We rely on the fact that data->pages is an ordered list...
1336 start
= page_offset(first
->wb_page
) + first
->wb_offset
;
1337 end
= page_offset(last
->wb_page
) + (last
->wb_offset
+ last
->wb_bytes
);
1339 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1340 if (end
>= inode
->i_size
|| len
< 0 || len
> (~((u32
)0) >> 1))
1343 data
->inode
= inode
;
1344 data
->cred
= first
->wb_cred
;
1345 data
->args
.fh
= NFS_FH(inode
);
1346 data
->args
.offset
= start
;
1347 data
->res
.count
= data
->args
.count
= (u32
)len
;
1348 data
->res
.fattr
= &data
->fattr
;
1349 data
->res
.verf
= &data
->verf
;
1353 * Commit dirty pages
1356 nfs_commit_list(struct list_head
*head
, int how
)
1358 struct rpc_message msg
;
1359 struct rpc_clnt
*clnt
;
1360 struct nfs_write_data
*data
;
1361 struct rpc_task
*task
;
1362 struct nfs_page
*req
;
1364 async
= !(how
& FLUSH_SYNC
);
1367 data
= nfs_writedata_alloc();
1373 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
1375 /* Set up the argument struct */
1376 nfs_commit_rpcsetup(head
, data
);
1377 req
= nfs_list_entry(data
->pages
.next
);
1378 clnt
= NFS_CLIENT(req
->wb_inode
);
1380 rpc_init_task(task
, clnt
, nfs_commit_done
, flags
);
1381 task
->tk_calldata
= data
;
1382 /* Release requests */
1383 task
->tk_release
= nfs_writedata_release
;
1385 msg
.rpc_proc
= NFS3PROC_COMMIT
;
1386 msg
.rpc_argp
= &data
->args
;
1387 msg
.rpc_resp
= &data
->res
;
1388 msg
.rpc_cred
= data
->cred
;
1390 dprintk("NFS: %4d initiated commit call\n", task
->tk_pid
);
1391 rpc_clnt_sigmask(clnt
, &oldset
);
1392 rpc_call_setup(task
, &msg
, 0);
1394 rpc_clnt_sigunmask(clnt
, &oldset
);
1397 while (!list_empty(head
)) {
1398 req
= nfs_list_entry(head
->next
);
1399 nfs_list_remove_request(req
);
1400 nfs_mark_request_commit(req
);
1401 nfs_unlock_request(req
);
1407 * COMMIT call returned
1410 nfs_commit_done(struct rpc_task
*task
)
1412 struct nfs_write_data
*data
= (struct nfs_write_data
*)task
->tk_calldata
;
1413 struct nfs_writeres
*resp
= &data
->res
;
1414 struct nfs_page
*req
;
1415 struct inode
*inode
= data
->inode
;
1417 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1418 task
->tk_pid
, task
->tk_status
);
1420 nfs_write_attributes(inode
, resp
->fattr
);
1421 while (!list_empty(&data
->pages
)) {
1422 req
= nfs_list_entry(data
->pages
.next
);
1423 nfs_list_remove_request(req
);
1425 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1426 req
->wb_inode
->i_dev
,
1427 (long long)NFS_FILEID(req
->wb_inode
),
1429 (long long)(page_offset(req
->wb_page
) + req
->wb_offset
));
1430 if (task
->tk_status
< 0) {
1432 req
->wb_file
->f_error
= task
->tk_status
;
1433 nfs_inode_remove_request(req
);
1434 dprintk(", error = %d\n", task
->tk_status
);
1438 /* Okay, COMMIT succeeded, apparently. Check the verifier
1439 * returned by the server against all stored verfs. */
1440 if (!memcmp(req
->wb_verf
.verifier
, data
->verf
.verifier
, sizeof(data
->verf
.verifier
))) {
1441 /* We have a match */
1442 nfs_inode_remove_request(req
);
1446 /* We have a mismatch. Write the page again */
1447 dprintk(" mismatch\n");
1448 nfs_mark_request_dirty(req
);
1450 nfs_unlock_request(req
);
1455 int nfs_flush_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1456 unsigned int npages
, int how
)
1462 res
= nfs_scan_dirty(inode
, &head
, file
, idx_start
, npages
);
1464 error
= nfs_flush_list(inode
, &head
, how
);
1470 int nfs_flush_timeout(struct inode
*inode
, int how
)
1476 pages
= nfs_scan_dirty_timeout(inode
, &head
);
1478 error
= nfs_flush_list(inode
, &head
, how
);
1484 #ifdef CONFIG_NFS_V3
1485 int nfs_commit_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1486 unsigned int npages
, int how
)
1492 res
= nfs_scan_commit(inode
, &head
, file
, idx_start
, npages
);
1494 error
= nfs_commit_list(&head
, how
);
1500 int nfs_commit_timeout(struct inode
*inode
, int how
)
1506 pages
= nfs_scan_commit_timeout(inode
, &head
);
1508 pages
+= nfs_scan_commit(inode
, &head
, NULL
, 0, 0);
1509 error
= nfs_commit_list(&head
, how
);
1517 int nfs_sync_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1518 unsigned int npages
, int how
)
1523 wait
= how
& FLUSH_WAIT
;
1527 inode
= file
->f_dentry
->d_inode
;
1532 error
= nfs_wait_on_requests(inode
, file
, idx_start
, npages
);
1534 error
= nfs_flush_file(inode
, file
, idx_start
, npages
, how
);
1535 #ifdef CONFIG_NFS_V3
1537 error
= nfs_commit_file(inode
, file
, idx_start
, npages
, how
);
1539 } while (error
> 0);
1543 int nfs_init_nfspagecache(void)
1545 nfs_page_cachep
= kmem_cache_create("nfs_page",
1546 sizeof(struct nfs_page
),
1547 0, SLAB_HWCACHE_ALIGN
,
1549 if (nfs_page_cachep
== NULL
)
1552 nfs_wdata_cachep
= kmem_cache_create("nfs_write_data",
1553 sizeof(struct nfs_write_data
),
1554 0, SLAB_HWCACHE_ALIGN
,
1556 if (nfs_wdata_cachep
== NULL
)
1562 void nfs_destroy_nfspagecache(void)
1564 if (kmem_cache_destroy(nfs_page_cachep
))
1565 printk(KERN_INFO
"nfs_page: not all structures were freed\n");
1566 if (kmem_cache_destroy(nfs_wdata_cachep
))
1567 printk(KERN_INFO
"nfs_write_data: not all structures were freed\n");