4 * Writing file data over NFS.
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/malloc.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
64 #define NFS_PARANOIA 1
65 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
70 spinlock_t nfs_wreq_lock
= SPIN_LOCK_UNLOCKED
;
71 static atomic_t nfs_nr_requests
= ATOMIC_INIT(0);
76 * This is the struct where the WRITE/COMMIT arguments go.
78 struct nfs_write_data
{
81 struct rpc_cred
*cred
;
82 struct nfs_writeargs args
; /* argument struct */
83 struct nfs_writeres res
; /* result struct */
84 struct nfs_fattr fattr
;
85 struct nfs_writeverf verf
;
86 struct list_head pages
; /* Coalesced requests we wish to flush */
90 * Local function declarations
92 static struct nfs_page
* nfs_update_request(struct file
*, struct inode
*,
94 unsigned int, unsigned int);
95 static void nfs_strategy(struct inode
*inode
);
96 static void nfs_writeback_done(struct rpc_task
*);
98 static void nfs_commit_done(struct rpc_task
*);
101 /* Hack for future NFS swap support */
103 # define IS_SWAPFILE(inode) (0)
106 static kmem_cache_t
*nfs_page_cachep
;
107 static kmem_cache_t
*nfs_wdata_cachep
;
109 static __inline__
struct nfs_page
*nfs_page_alloc(void)
112 p
= kmem_cache_alloc(nfs_page_cachep
, SLAB_KERNEL
);
114 memset(p
, 0, sizeof(*p
));
115 INIT_LIST_HEAD(&p
->wb_hash
);
116 INIT_LIST_HEAD(&p
->wb_list
);
117 init_waitqueue_head(&p
->wb_wait
);
122 static __inline__
void nfs_page_free(struct nfs_page
*p
)
124 kmem_cache_free(nfs_page_cachep
, p
);
127 static __inline__
struct nfs_write_data
*nfs_writedata_alloc(void)
129 struct nfs_write_data
*p
;
130 p
= kmem_cache_alloc(nfs_wdata_cachep
, SLAB_NFS
);
132 memset(p
, 0, sizeof(*p
));
133 INIT_LIST_HEAD(&p
->pages
);
138 static __inline__
void nfs_writedata_free(struct nfs_write_data
*p
)
140 kmem_cache_free(nfs_wdata_cachep
, p
);
143 static void nfs_writedata_release(struct rpc_task
*task
)
145 struct nfs_write_data
*wdata
= (struct nfs_write_data
*)task
->tk_calldata
;
146 nfs_writedata_free(wdata
);
150 * This function will be used to simulate weak cache consistency
151 * under NFSv2 when the NFSv3 attribute patch is included.
152 * For the moment, we just call nfs_refresh_inode().
154 static __inline__
int
155 nfs_write_attributes(struct inode
*inode
, struct nfs_fattr
*fattr
)
157 if ((fattr
->valid
& NFS_ATTR_FATTR
) && !(fattr
->valid
& NFS_ATTR_WCC
)) {
158 fattr
->pre_size
= NFS_CACHE_ISIZE(inode
);
159 fattr
->pre_mtime
= NFS_CACHE_MTIME(inode
);
160 fattr
->pre_ctime
= NFS_CACHE_CTIME(inode
);
161 fattr
->valid
|= NFS_ATTR_WCC
;
163 return nfs_refresh_inode(inode
, fattr
);
167 * Write a page synchronously.
168 * Offset is the data offset within the page.
171 nfs_writepage_sync(struct file
*file
, struct inode
*inode
, struct page
*page
,
172 unsigned int offset
, unsigned int count
)
174 struct rpc_cred
*cred
= NULL
;
176 unsigned int wsize
= NFS_SERVER(inode
)->wsize
;
177 int result
, refresh
= 0, written
= 0, flags
;
179 struct nfs_fattr fattr
;
180 struct nfs_writeverf verf
;
184 cred
= nfs_file_cred(file
);
187 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
188 inode
->i_dev
, (long long)NFS_FILEID(inode
),
189 count
, (long long)(page_offset(page
) + offset
));
191 buffer
= kmap(page
) + offset
;
192 base
= page_offset(page
) + offset
;
194 flags
= ((IS_SWAPFILE(inode
)) ? NFS_RW_SWAP
: 0) | NFS_RW_SYNC
;
197 if (count
< wsize
&& !IS_SWAPFILE(inode
))
200 result
= NFS_PROTO(inode
)->write(inode
, cred
, &fattr
, flags
,
201 base
, wsize
, buffer
, &verf
);
202 nfs_write_attributes(inode
, &fattr
);
205 /* Must mark the page invalid after I/O error */
206 ClearPageUptodate(page
);
210 printk("NFS: short write, wsize=%u, result=%d\n",
218 * If we've extended the file, update the inode
219 * now so we don't invalidate the cache.
221 if (base
> inode
->i_size
)
222 inode
->i_size
= base
;
226 ClearPageError(page
);
232 return written
? written
: result
;
236 nfs_writepage_async(struct file
*file
, struct inode
*inode
, struct page
*page
,
237 unsigned int offset
, unsigned int count
)
239 struct nfs_page
*req
;
242 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
243 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
246 nfs_release_request(req
);
253 * Write an mmapped page to the server.
256 nfs_writepage(struct page
*page
)
259 unsigned long end_index
;
260 unsigned offset
= PAGE_CACHE_SIZE
;
262 struct address_space
*mapping
= page
->mapping
;
266 inode
= (struct inode
*)mapping
->host
;
269 end_index
= inode
->i_size
>> PAGE_CACHE_SHIFT
;
271 /* Ensure we've flushed out any previous writes */
272 nfs_wb_page(inode
,page
);
275 if (page
->index
< end_index
)
277 /* things got complicated... */
278 offset
= inode
->i_size
& (PAGE_CACHE_SIZE
-1);
280 /* OK, are we completely out? */
282 if (page
->index
>= end_index
+1 || !offset
)
285 if (!PageError(page
) && NFS_SERVER(inode
)->rsize
>= PAGE_CACHE_SIZE
) {
286 err
= nfs_writepage_async(NULL
, inode
, page
, 0, offset
);
290 err
= nfs_writepage_sync(NULL
, inode
, page
, 0, offset
);
291 if ( err
== offset
) {
301 * Check whether the file range we want to write to is locked by
305 region_locked(struct inode
*inode
, struct nfs_page
*req
)
307 struct file_lock
*fl
;
308 loff_t rqstart
, rqend
;
310 /* Don't optimize writes if we don't use NLM */
311 if (NFS_SERVER(inode
)->flags
& NFS_MOUNT_NONLM
)
314 rqstart
= page_offset(req
->wb_page
) + req
->wb_offset
;
315 rqend
= rqstart
+ req
->wb_bytes
;
316 for (fl
= inode
->i_flock
; fl
; fl
= fl
->fl_next
) {
317 if (fl
->fl_owner
== current
->files
&& (fl
->fl_flags
& FL_POSIX
)
318 && fl
->fl_type
== F_WRLCK
319 && fl
->fl_start
<= rqstart
&& rqend
<= fl
->fl_end
) {
328 * Insert a write request into an inode
331 nfs_inode_add_request(struct inode
*inode
, struct nfs_page
*req
)
333 if (!list_empty(&req
->wb_hash
))
335 if (!NFS_WBACK_BUSY(req
))
336 printk(KERN_ERR
"NFS: unlocked request attempted hashed!\n");
337 if (list_empty(&inode
->u
.nfs_i
.writeback
))
338 atomic_inc(&inode
->i_count
);
339 inode
->u
.nfs_i
.npages
++;
340 list_add(&req
->wb_hash
, &inode
->u
.nfs_i
.writeback
);
345 * Insert a write request into an inode
348 nfs_inode_remove_request(struct nfs_page
*req
)
351 spin_lock(&nfs_wreq_lock
);
352 if (list_empty(&req
->wb_hash
)) {
353 spin_unlock(&nfs_wreq_lock
);
356 if (!NFS_WBACK_BUSY(req
))
357 printk(KERN_ERR
"NFS: unlocked request attempted unhashed!\n");
358 inode
= req
->wb_inode
;
359 list_del(&req
->wb_hash
);
360 INIT_LIST_HEAD(&req
->wb_hash
);
361 inode
->u
.nfs_i
.npages
--;
362 if ((inode
->u
.nfs_i
.npages
== 0) != list_empty(&inode
->u
.nfs_i
.writeback
))
363 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.npages.\n");
364 if (list_empty(&inode
->u
.nfs_i
.writeback
))
366 if (!nfs_have_writebacks(inode
) && !nfs_have_read(inode
))
367 inode_remove_flushd(inode
);
368 spin_unlock(&nfs_wreq_lock
);
369 nfs_release_request(req
);
375 static inline struct nfs_page
*
376 _nfs_find_request(struct inode
*inode
, struct page
*page
)
378 struct list_head
*head
, *next
;
380 head
= &inode
->u
.nfs_i
.writeback
;
382 while (next
!= head
) {
383 struct nfs_page
*req
= nfs_inode_wb_entry(next
);
385 if (page_index(req
->wb_page
) != page_index(page
))
393 static struct nfs_page
*
394 nfs_find_request(struct inode
*inode
, struct page
*page
)
396 struct nfs_page
*req
;
398 spin_lock(&nfs_wreq_lock
);
399 req
= _nfs_find_request(inode
, page
);
400 spin_unlock(&nfs_wreq_lock
);
405 * Insert a write request into a sorted list
407 void nfs_list_add_request(struct nfs_page
*req
, struct list_head
*head
)
409 struct list_head
*prev
;
411 if (!list_empty(&req
->wb_list
)) {
412 printk(KERN_ERR
"NFS: Add to list failed!\n");
415 if (!NFS_WBACK_BUSY(req
))
416 printk(KERN_ERR
"NFS: unlocked request attempted added to list!\n");
418 while (prev
!= head
) {
419 struct nfs_page
*p
= nfs_list_entry(prev
);
420 if (page_index(p
->wb_page
) < page_index(req
->wb_page
))
424 list_add(&req
->wb_list
, prev
);
425 req
->wb_list_head
= head
;
429 * Insert a write request into an inode
431 void nfs_list_remove_request(struct nfs_page
*req
)
433 if (list_empty(&req
->wb_list
))
435 if (!NFS_WBACK_BUSY(req
))
436 printk(KERN_ERR
"NFS: unlocked request attempted removed from list!\n");
437 list_del(&req
->wb_list
);
438 INIT_LIST_HEAD(&req
->wb_list
);
439 req
->wb_list_head
= NULL
;
443 * Add a request to the inode's dirty list.
446 nfs_mark_request_dirty(struct nfs_page
*req
)
448 struct inode
*inode
= req
->wb_inode
;
450 spin_lock(&nfs_wreq_lock
);
451 if (list_empty(&req
->wb_list
)) {
452 nfs_list_add_request(req
, &inode
->u
.nfs_i
.dirty
);
453 inode
->u
.nfs_i
.ndirty
++;
455 spin_unlock(&nfs_wreq_lock
);
457 * NB: the call to inode_schedule_scan() must lie outside the
458 * spinlock since it can run flushd().
460 inode_schedule_scan(inode
, req
->wb_timeout
);
464 * Check if a request is dirty
467 nfs_dirty_request(struct nfs_page
*req
)
469 struct inode
*inode
= req
->wb_inode
;
470 return !list_empty(&req
->wb_list
) && req
->wb_list_head
== &inode
->u
.nfs_i
.dirty
;
475 * Add a request to the inode's commit list.
478 nfs_mark_request_commit(struct nfs_page
*req
)
480 struct inode
*inode
= req
->wb_inode
;
482 spin_lock(&nfs_wreq_lock
);
483 if (list_empty(&req
->wb_list
)) {
484 nfs_list_add_request(req
, &inode
->u
.nfs_i
.commit
);
485 inode
->u
.nfs_i
.ncommit
++;
487 spin_unlock(&nfs_wreq_lock
);
489 * NB: the call to inode_schedule_scan() must lie outside the
490 * spinlock since it can run flushd().
492 inode_schedule_scan(inode
, req
->wb_timeout
);
497 * Create a write request.
498 * Page must be locked by the caller. This makes sure we never create
499 * two different requests for the same page, and avoids possible deadlock
500 * when we reach the hard limit on the number of dirty pages.
501 * It should be safe to sleep here.
503 struct nfs_page
*nfs_create_request(struct file
*file
, struct inode
*inode
,
505 unsigned int offset
, unsigned int count
)
507 struct nfs_reqlist
*cache
= NFS_REQUESTLIST(inode
);
508 struct nfs_page
*req
= NULL
;
511 /* Deal with hard/soft limits.
514 /* If we're over the global soft limit, wake up all requests */
515 if (atomic_read(&nfs_nr_requests
) >= MAX_REQUEST_SOFT
) {
516 dprintk("NFS: hit soft limit (%d requests)\n",
517 atomic_read(&nfs_nr_requests
));
519 nfs_reqlist_init(NFS_SERVER(inode
));
523 /* If we haven't reached the local hard limit yet,
524 * try to allocate the request struct */
525 if (atomic_read(&cache
->nr_requests
) < MAX_REQUEST_HARD
) {
526 req
= nfs_page_alloc();
531 /* We're over the hard limit. Wait for better times */
532 dprintk("NFS: create_request sleeping (total %d pid %d)\n",
533 atomic_read(&cache
->nr_requests
), current
->pid
);
536 if (NFS_SERVER(inode
)->flags
& NFS_MOUNT_INTR
) {
537 interruptible_sleep_on_timeout(&cache
->request_wait
,
542 sleep_on_timeout(&cache
->request_wait
, timeout
);
544 dprintk("NFS: create_request waking up (tot %d pid %d)\n",
545 atomic_read(&cache
->nr_requests
), current
->pid
);
550 /* Initialize the request struct. Initially, we assume a
551 * long write-back delay. This will be adjusted in
552 * update_nfs_request below if the region is not locked. */
554 page_cache_get(page
);
555 req
->wb_offset
= offset
;
556 req
->wb_bytes
= count
;
559 /* If we have a struct file, use its cached credentials
560 * else cache the current process' credentials. */
563 req
->wb_cred
= nfs_file_cred(file
);
565 req
->wb_cred
= rpcauth_lookupcred(NFS_CLIENT(inode
)->cl_auth
, 0);
566 req
->wb_inode
= inode
;
569 /* register request's existence */
570 atomic_inc(&cache
->nr_requests
);
571 atomic_inc(&nfs_nr_requests
);
577 * Release all resources associated with a write request after it
578 * has been committed to stable storage
580 * Note: Should always be called with the spinlock held!
583 nfs_release_request(struct nfs_page
*req
)
585 struct inode
*inode
= req
->wb_inode
;
586 struct nfs_reqlist
*cache
= NFS_REQUESTLIST(inode
);
587 struct page
*page
= req
->wb_page
;
589 spin_lock(&nfs_wreq_lock
);
590 if (--req
->wb_count
) {
591 spin_unlock(&nfs_wreq_lock
);
594 spin_unlock(&nfs_wreq_lock
);
596 if (!list_empty(&req
->wb_list
)) {
597 printk(KERN_ERR
"NFS: Request released while still on a list!\n");
598 nfs_list_remove_request(req
);
600 if (!list_empty(&req
->wb_hash
)) {
601 printk(KERN_ERR
"NFS: Request released while still hashed!\n");
602 nfs_inode_remove_request(req
);
604 if (NFS_WBACK_BUSY(req
))
605 printk(KERN_ERR
"NFS: Request released while still locked!\n");
607 /* Release struct file or cached credential */
611 rpcauth_releasecred(NFS_CLIENT(inode
)->cl_auth
, req
->wb_cred
);
612 page_cache_release(page
);
614 /* wake up anyone waiting to allocate a request */
615 atomic_dec(&cache
->nr_requests
);
616 atomic_dec(&nfs_nr_requests
);
617 wake_up(&cache
->request_wait
);
619 if (atomic_read(&cache
->nr_requests
) < 0)
621 if (atomic_read(&nfs_nr_requests
) < 0)
627 * Wait for a request to complete.
629 * Interruptible by signals only if mounted with intr flag.
632 nfs_wait_on_request(struct nfs_page
*req
)
634 struct inode
*inode
= req
->wb_inode
;
635 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
637 if (!NFS_WBACK_BUSY(req
))
639 return nfs_wait_event(clnt
, req
->wb_wait
, !NFS_WBACK_BUSY(req
));
643 * Wait for a request to complete.
645 * Interruptible by signals only if mounted with intr flag.
648 nfs_wait_on_requests(struct inode
*inode
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
650 struct list_head
*p
, *head
;
651 unsigned long idx_end
;
652 unsigned int res
= 0;
658 idx_end
= idx_start
+ npages
- 1;
660 spin_lock(&nfs_wreq_lock
);
661 head
= &inode
->u
.nfs_i
.writeback
;
664 unsigned long pg_idx
;
665 struct nfs_page
*req
= nfs_inode_wb_entry(p
);
669 if (file
&& req
->wb_file
!= file
)
672 pg_idx
= page_index(req
->wb_page
);
673 if (pg_idx
< idx_start
|| pg_idx
> idx_end
)
676 if (!NFS_WBACK_BUSY(req
))
679 spin_unlock(&nfs_wreq_lock
);
680 error
= nfs_wait_on_request(req
);
681 nfs_release_request(req
);
684 spin_lock(&nfs_wreq_lock
);
688 spin_unlock(&nfs_wreq_lock
);
693 * Scan cluster for dirty pages and send as many of them to the
694 * server as possible.
696 int nfs_scan_list_timeout(struct list_head
*head
, struct list_head
*dst
, struct inode
*inode
)
699 struct nfs_page
*req
;
704 req
= nfs_list_entry(p
);
706 if (time_after(req
->wb_timeout
, jiffies
)) {
707 if (time_after(NFS_NEXTSCAN(inode
), req
->wb_timeout
))
708 NFS_NEXTSCAN(inode
) = req
->wb_timeout
;
711 if (!nfs_lock_request(req
))
713 nfs_list_remove_request(req
);
714 nfs_list_add_request(req
, dst
);
721 nfs_scan_dirty_timeout(struct inode
*inode
, struct list_head
*dst
)
724 spin_lock(&nfs_wreq_lock
);
725 pages
= nfs_scan_list_timeout(&inode
->u
.nfs_i
.dirty
, dst
, inode
);
726 inode
->u
.nfs_i
.ndirty
-= pages
;
727 if ((inode
->u
.nfs_i
.ndirty
== 0) != list_empty(&inode
->u
.nfs_i
.dirty
))
728 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
729 spin_unlock(&nfs_wreq_lock
);
735 nfs_scan_commit_timeout(struct inode
*inode
, struct list_head
*dst
)
738 spin_lock(&nfs_wreq_lock
);
739 pages
= nfs_scan_list_timeout(&inode
->u
.nfs_i
.commit
, dst
, inode
);
740 inode
->u
.nfs_i
.ncommit
-= pages
;
741 if ((inode
->u
.nfs_i
.ncommit
== 0) != list_empty(&inode
->u
.nfs_i
.commit
))
742 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
743 spin_unlock(&nfs_wreq_lock
);
748 int nfs_scan_list(struct list_head
*src
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
751 struct nfs_page
*req
;
752 unsigned long idx_end
;
759 idx_end
= idx_start
+ npages
- 1;
762 unsigned long pg_idx
;
764 req
= nfs_list_entry(p
);
767 if (file
&& req
->wb_file
!= file
)
770 pg_idx
= page_index(req
->wb_page
);
771 if (pg_idx
< idx_start
|| pg_idx
> idx_end
)
774 if (!nfs_lock_request(req
))
776 nfs_list_remove_request(req
);
777 nfs_list_add_request(req
, dst
);
784 nfs_scan_dirty(struct inode
*inode
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
787 spin_lock(&nfs_wreq_lock
);
788 res
= nfs_scan_list(&inode
->u
.nfs_i
.dirty
, dst
, file
, idx_start
, npages
);
789 inode
->u
.nfs_i
.ndirty
-= res
;
790 if ((inode
->u
.nfs_i
.ndirty
== 0) != list_empty(&inode
->u
.nfs_i
.dirty
))
791 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ndirty.\n");
792 spin_unlock(&nfs_wreq_lock
);
798 nfs_scan_commit(struct inode
*inode
, struct list_head
*dst
, struct file
*file
, unsigned long idx_start
, unsigned int npages
)
801 spin_lock(&nfs_wreq_lock
);
802 res
= nfs_scan_list(&inode
->u
.nfs_i
.commit
, dst
, file
, idx_start
, npages
);
803 inode
->u
.nfs_i
.ncommit
-= res
;
804 if ((inode
->u
.nfs_i
.ncommit
== 0) != list_empty(&inode
->u
.nfs_i
.commit
))
805 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.ncommit.\n");
806 spin_unlock(&nfs_wreq_lock
);
812 int nfs_coalesce_requests(struct list_head
*src
, struct list_head
*dst
, unsigned int maxpages
)
814 struct nfs_page
*req
= NULL
;
815 unsigned int pages
= 0;
817 while (!list_empty(src
)) {
818 struct nfs_page
*prev
= req
;
820 req
= nfs_list_entry(src
->next
);
822 if (req
->wb_file
!= prev
->wb_file
)
824 if (page_index(req
->wb_page
) != page_index(prev
->wb_page
)+1)
827 if (req
->wb_offset
!= 0)
830 nfs_list_remove_request(req
);
831 nfs_list_add_request(req
, dst
);
833 if (req
->wb_offset
+ req
->wb_bytes
!= PAGE_CACHE_SIZE
)
835 if (pages
>= maxpages
)
842 * Try to update any existing write request, or create one if there is none.
843 * In order to match, the request's credentials must match those of
844 * the calling process.
846 * Note: Should always be called with the Page Lock held!
848 static struct nfs_page
*
849 nfs_update_request(struct file
* file
, struct inode
*inode
, struct page
*page
,
850 unsigned int offset
, unsigned int bytes
)
852 struct nfs_page
*req
, *new = NULL
;
853 unsigned long rqend
, end
;
855 end
= offset
+ bytes
;
858 /* Loop over all inode entries and see if we find
859 * A request for the page we wish to update
861 spin_lock(&nfs_wreq_lock
);
862 req
= _nfs_find_request(inode
, page
);
864 if (!nfs_lock_request(req
)) {
865 spin_unlock(&nfs_wreq_lock
);
866 nfs_wait_on_request(req
);
867 nfs_release_request(req
);
870 spin_unlock(&nfs_wreq_lock
);
872 nfs_release_request(new);
878 nfs_lock_request(req
);
879 nfs_inode_add_request(inode
, req
);
880 spin_unlock(&nfs_wreq_lock
);
881 nfs_mark_request_dirty(req
);
884 spin_unlock(&nfs_wreq_lock
);
887 * If we're over the soft limit, flush out old requests
889 if (inode
->u
.nfs_i
.npages
>= MAX_REQUEST_SOFT
)
890 nfs_wb_file(inode
, file
);
891 new = nfs_create_request(file
, inode
, page
, offset
, bytes
);
893 return ERR_PTR(-ENOMEM
);
894 /* If the region is locked, adjust the timeout */
895 if (region_locked(inode
, new))
896 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_LOCKDELAY
;
898 new->wb_timeout
= jiffies
+ NFS_WRITEBACK_DELAY
;
901 /* We have a request for our page.
902 * If the creds don't match, or the
903 * page addresses don't match,
904 * tell the caller to wait on the conflicting
907 rqend
= req
->wb_offset
+ req
->wb_bytes
;
908 if (req
->wb_file
!= file
909 || req
->wb_page
!= page
910 || !nfs_dirty_request(req
)
911 || offset
> rqend
|| end
< req
->wb_offset
) {
912 nfs_unlock_request(req
);
913 nfs_release_request(req
);
914 return ERR_PTR(-EBUSY
);
917 /* Okay, the request matches. Update the region */
918 if (offset
< req
->wb_offset
) {
919 req
->wb_offset
= offset
;
920 req
->wb_bytes
= rqend
- req
->wb_offset
;
924 req
->wb_bytes
= end
- req
->wb_offset
;
926 nfs_unlock_request(req
);
932 * This is the strategy routine for NFS.
933 * It is called by nfs_updatepage whenever the user wrote up to the end
936 * We always try to submit a set of requests in parallel so that the
937 * server's write code can gather writes. This is mainly for the benefit
940 * We never submit more requests than we think the remote can handle.
941 * For UDP sockets, we make sure we don't exceed the congestion window;
942 * for TCP, we limit the number of requests to 8.
944 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
945 * should be sent out in one go. This is for the benefit of NFSv2 servers
946 * that perform write gathering.
948 * FIXME: Different servers may have different sweet spots.
949 * Record the average congestion window in server struct?
951 #define NFS_STRATEGY_PAGES 8
953 nfs_strategy(struct inode
*inode
)
955 unsigned int dirty
, wpages
;
957 dirty
= inode
->u
.nfs_i
.ndirty
;
958 wpages
= NFS_SERVER(inode
)->wpages
;
960 if (NFS_PROTO(inode
)->version
== 2) {
961 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
962 nfs_flush_file(inode
, NULL
, 0, 0, 0);
965 nfs_flush_file(inode
, NULL
, 0, 0, 0);
966 if (inode
->u
.nfs_i
.ncommit
> NFS_STRATEGY_PAGES
* wpages
&&
967 atomic_read(&nfs_nr_requests
) > MAX_REQUEST_SOFT
)
968 nfs_commit_file(inode
, NULL
, 0, 0, 0);
971 if (dirty
>= NFS_STRATEGY_PAGES
* wpages
)
972 nfs_flush_file(inode
, NULL
, 0, 0, 0);
975 * If we're running out of free requests, flush out everything
976 * in order to reduce memory useage...
978 if (inode
->u
.nfs_i
.npages
> MAX_REQUEST_SOFT
)
983 nfs_flush_incompatible(struct file
*file
, struct page
*page
)
985 struct inode
*inode
= file
->f_dentry
->d_inode
;
986 struct nfs_page
*req
;
989 * Look for a request corresponding to this page. If there
990 * is one, and it belongs to another file, we flush it out
991 * before we try to copy anything into the page. Do this
992 * due to the lack of an ACCESS-type call in NFSv2.
993 * Also do the same if we find a request from an existing
996 req
= nfs_find_request(inode
,page
);
998 if (req
->wb_file
!= file
|| req
->wb_page
!= page
)
999 status
= nfs_wb_page(inode
, page
);
1000 nfs_release_request(req
);
1002 return (status
< 0) ? status
: 0;
1006 * Update and possibly write a cached page of an NFS file.
1008 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
1009 * things with a page scheduled for an RPC call (e.g. invalidate it).
1012 nfs_updatepage(struct file
*file
, struct page
*page
, unsigned int offset
, unsigned int count
)
1014 struct dentry
*dentry
= file
->f_dentry
;
1015 struct inode
*inode
= dentry
->d_inode
;
1016 struct nfs_page
*req
;
1017 int synchronous
= file
->f_flags
& O_SYNC
;
1020 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
1021 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
1022 count
, (long long)(page_offset(page
) +offset
));
1025 * If wsize is smaller than page size, update and write
1026 * page synchronously.
1028 if (NFS_SERVER(inode
)->wsize
< PAGE_SIZE
)
1029 return nfs_writepage_sync(file
, inode
, page
, offset
, count
);
1032 * Try to find an NFS request corresponding to this page
1034 * If the existing request cannot be updated, we must flush
1038 req
= nfs_update_request(file
, inode
, page
, offset
, count
);
1039 status
= (IS_ERR(req
)) ? PTR_ERR(req
) : 0;
1040 if (status
!= -EBUSY
)
1042 /* Request could not be updated. Flush it out and try again */
1043 status
= nfs_wb_page(inode
, page
);
1044 } while (status
>= 0);
1048 if (req
->wb_bytes
== PAGE_CACHE_SIZE
)
1049 SetPageUptodate(page
);
1055 error
= nfs_sync_file(inode
, file
, page_index(page
), 1, FLUSH_SYNC
|FLUSH_STABLE
);
1056 if (error
< 0 || (error
= file
->f_error
) < 0)
1060 /* If we wrote past the end of the page.
1061 * Call the strategy routine so it can send out a bunch
1064 if (req
->wb_offset
== 0 && req
->wb_bytes
== PAGE_CACHE_SIZE
)
1065 nfs_strategy(inode
);
1067 nfs_release_request(req
);
1069 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
1070 status
, (long long)inode
->i_size
);
1072 ClearPageUptodate(page
);
1077 * Set up the argument/result storage required for the RPC call.
1080 nfs_write_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
1082 struct nfs_page
*req
;
1086 /* Set up the RPC argument and reply structs
1087 * NB: take care not to mess about with data->commit et al. */
1089 iov
= data
->args
.iov
;
1091 while (!list_empty(head
)) {
1092 struct nfs_page
*req
= nfs_list_entry(head
->next
);
1093 nfs_list_remove_request(req
);
1094 nfs_list_add_request(req
, &data
->pages
);
1095 iov
->iov_base
= kmap(req
->wb_page
) + req
->wb_offset
;
1096 iov
->iov_len
= req
->wb_bytes
;
1097 count
+= req
->wb_bytes
;
1101 req
= nfs_list_entry(data
->pages
.next
);
1102 data
->inode
= req
->wb_inode
;
1103 data
->cred
= req
->wb_cred
;
1104 data
->args
.fh
= NFS_FH(req
->wb_inode
);
1105 data
->args
.offset
= page_offset(req
->wb_page
) + req
->wb_offset
;
1106 data
->args
.count
= count
;
1107 data
->res
.fattr
= &data
->fattr
;
1108 data
->res
.count
= count
;
1109 data
->res
.verf
= &data
->verf
;
1114 * Create an RPC task for the given write request and kick it.
1115 * The page must have been locked by the caller.
1117 * It may happen that the page we're passed is not marked dirty.
1118 * This is the case if nfs_updatepage detects a conflicting request
1119 * that has been written but not committed.
1122 nfs_flush_one(struct list_head
*head
, struct inode
*inode
, int how
)
1124 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
1125 struct nfs_write_data
*data
;
1126 struct rpc_task
*task
;
1127 struct rpc_message msg
;
1129 async
= !(how
& FLUSH_SYNC
),
1130 stable
= (how
& FLUSH_STABLE
);
1134 data
= nfs_writedata_alloc();
1139 /* Set the initial flags for the task. */
1140 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
1142 /* Set up the argument struct */
1143 nfs_write_rpcsetup(head
, data
);
1145 if (!inode
->u
.nfs_i
.ncommit
)
1146 data
->args
.stable
= NFS_FILE_SYNC
;
1148 data
->args
.stable
= NFS_DATA_SYNC
;
1150 data
->args
.stable
= NFS_UNSTABLE
;
1152 /* Finalize the task. */
1153 rpc_init_task(task
, clnt
, nfs_writeback_done
, flags
);
1154 task
->tk_calldata
= data
;
1155 /* Release requests */
1156 task
->tk_release
= nfs_writedata_release
;
1158 #ifdef CONFIG_NFS_V3
1159 msg
.rpc_proc
= (NFS_PROTO(inode
)->version
== 3) ? NFS3PROC_WRITE
: NFSPROC_WRITE
;
1161 msg
.rpc_proc
= NFSPROC_WRITE
;
1163 msg
.rpc_argp
= &data
->args
;
1164 msg
.rpc_resp
= &data
->res
;
1165 msg
.rpc_cred
= data
->cred
;
1167 dprintk("NFS: %4d initiated write call (req %x/%Ld count %d nriov %d)\n",
1170 (long long)NFS_FILEID(inode
),
1171 data
->args
.count
, data
->args
.nriov
);
1173 rpc_clnt_sigmask(clnt
, &oldset
);
1174 rpc_call_setup(task
, &msg
, 0);
1176 rpc_clnt_sigunmask(clnt
, &oldset
);
1179 while (!list_empty(head
)) {
1180 struct nfs_page
*req
= nfs_list_entry(head
->next
);
1181 nfs_list_remove_request(req
);
1182 nfs_mark_request_dirty(req
);
1183 nfs_unlock_request(req
);
1189 nfs_flush_list(struct inode
*inode
, struct list_head
*head
, int how
)
1191 LIST_HEAD(one_request
);
1192 struct nfs_page
*req
;
1194 unsigned int pages
= 0,
1195 wpages
= NFS_SERVER(inode
)->wpages
;
1197 while (!list_empty(head
)) {
1198 pages
+= nfs_coalesce_requests(head
, &one_request
, wpages
);
1199 req
= nfs_list_entry(one_request
.next
);
1200 error
= nfs_flush_one(&one_request
, req
->wb_inode
, how
);
1207 while (!list_empty(head
)) {
1208 req
= nfs_list_entry(head
->next
);
1209 nfs_list_remove_request(req
);
1210 nfs_mark_request_dirty(req
);
1211 nfs_unlock_request(req
);
1218 * This function is called when the WRITE call is complete.
1221 nfs_writeback_done(struct rpc_task
*task
)
1223 struct nfs_write_data
*data
= (struct nfs_write_data
*) task
->tk_calldata
;
1224 struct nfs_writeargs
*argp
= &data
->args
;
1225 struct nfs_writeres
*resp
= &data
->res
;
1226 struct inode
*inode
= data
->inode
;
1227 struct nfs_page
*req
;
1230 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1231 task
->tk_pid
, task
->tk_status
);
1233 /* We can't handle that yet but we check for it nevertheless */
1234 if (resp
->count
< argp
->count
&& task
->tk_status
>= 0) {
1235 static unsigned long complain
;
1236 if (time_before(complain
, jiffies
)) {
1238 "NFS: Server wrote less than requested.\n");
1239 complain
= jiffies
+ 300 * HZ
;
1241 /* Can't do anything about it right now except throw
1243 task
->tk_status
= -EIO
;
1245 #ifdef CONFIG_NFS_V3
1246 if (resp
->verf
->committed
< argp
->stable
&& task
->tk_status
>= 0) {
1247 /* We tried a write call, but the server did not
1248 * commit data to stable storage even though we
1250 * Note: There is a known bug in Tru64 < 5.0 in which
1251 * the server reports NFS_DATA_SYNC, but performs
1252 * NFS_FILE_SYNC. We therefore implement this checking
1253 * as a dprintk() in order to avoid filling syslog.
1255 static unsigned long complain
;
1257 if (time_before(complain
, jiffies
)) {
1258 dprintk("NFS: faulty NFSv3 server %s:"
1259 " (committed = %d) != (stable = %d)\n",
1260 NFS_SERVER(inode
)->hostname
,
1261 resp
->verf
->committed
, argp
->stable
);
1262 complain
= jiffies
+ 300 * HZ
;
1268 * Update attributes as result of writeback.
1269 * FIXME: There is an inherent race with invalidate_inode_pages and
1270 * writebacks since the page->count is kept > 1 for as long
1271 * as the page has a write request pending.
1273 nfs_write_attributes(inode
, resp
->fattr
);
1274 while (!list_empty(&data
->pages
)) {
1275 req
= nfs_list_entry(data
->pages
.next
);
1276 nfs_list_remove_request(req
);
1277 page
= req
->wb_page
;
1281 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1282 req
->wb_inode
->i_dev
,
1283 (long long)NFS_FILEID(req
->wb_inode
),
1285 (long long)(page_offset(page
) + req
->wb_offset
));
1287 if (task
->tk_status
< 0) {
1288 ClearPageUptodate(page
);
1291 req
->wb_file
->f_error
= task
->tk_status
;
1292 nfs_inode_remove_request(req
);
1293 dprintk(", error = %d\n", task
->tk_status
);
1297 #ifdef CONFIG_NFS_V3
1298 if (resp
->verf
->committed
!= NFS_UNSTABLE
) {
1299 nfs_inode_remove_request(req
);
1303 memcpy(&req
->wb_verf
, resp
->verf
, sizeof(req
->wb_verf
));
1304 req
->wb_timeout
= jiffies
+ NFS_COMMIT_DELAY
;
1305 nfs_mark_request_commit(req
);
1306 dprintk(" marked for commit\n");
1308 nfs_inode_remove_request(req
);
1311 nfs_unlock_request(req
);
1316 #ifdef CONFIG_NFS_V3
1318 * Set up the argument/result storage required for the RPC call.
1321 nfs_commit_rpcsetup(struct list_head
*head
, struct nfs_write_data
*data
)
1323 struct nfs_page
*first
, *last
;
1324 struct inode
*inode
;
1325 loff_t start
, end
, len
;
1327 /* Set up the RPC argument and reply structs
1328 * NB: take care not to mess about with data->commit et al. */
1330 list_splice(head
, &data
->pages
);
1331 INIT_LIST_HEAD(head
);
1332 first
= nfs_list_entry(data
->pages
.next
);
1333 last
= nfs_list_entry(data
->pages
.prev
);
1334 inode
= first
->wb_inode
;
1337 * Determine the offset range of requests in the COMMIT call.
1338 * We rely on the fact that data->pages is an ordered list...
1340 start
= page_offset(first
->wb_page
) + first
->wb_offset
;
1341 end
= page_offset(last
->wb_page
) + (last
->wb_offset
+ last
->wb_bytes
);
1343 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1344 if (end
>= inode
->i_size
|| len
< 0 || len
> (~((u32
)0) >> 1))
1347 data
->inode
= inode
;
1348 data
->cred
= first
->wb_cred
;
1349 data
->args
.fh
= NFS_FH(inode
);
1350 data
->args
.offset
= start
;
1351 data
->res
.count
= data
->args
.count
= (u32
)len
;
1352 data
->res
.fattr
= &data
->fattr
;
1353 data
->res
.verf
= &data
->verf
;
1357 * Commit dirty pages
1360 nfs_commit_list(struct list_head
*head
, int how
)
1362 struct rpc_message msg
;
1363 struct rpc_clnt
*clnt
;
1364 struct nfs_write_data
*data
;
1365 struct rpc_task
*task
;
1366 struct nfs_page
*req
;
1368 async
= !(how
& FLUSH_SYNC
);
1371 data
= nfs_writedata_alloc();
1377 flags
= (async
) ? RPC_TASK_ASYNC
: 0;
1379 /* Set up the argument struct */
1380 nfs_commit_rpcsetup(head
, data
);
1381 req
= nfs_list_entry(data
->pages
.next
);
1382 clnt
= NFS_CLIENT(req
->wb_inode
);
1384 rpc_init_task(task
, clnt
, nfs_commit_done
, flags
);
1385 task
->tk_calldata
= data
;
1386 /* Release requests */
1387 task
->tk_release
= nfs_writedata_release
;
1389 msg
.rpc_proc
= NFS3PROC_COMMIT
;
1390 msg
.rpc_argp
= &data
->args
;
1391 msg
.rpc_resp
= &data
->res
;
1392 msg
.rpc_cred
= data
->cred
;
1394 dprintk("NFS: %4d initiated commit call\n", task
->tk_pid
);
1395 rpc_clnt_sigmask(clnt
, &oldset
);
1396 rpc_call_setup(task
, &msg
, 0);
1398 rpc_clnt_sigunmask(clnt
, &oldset
);
1401 while (!list_empty(head
)) {
1402 req
= nfs_list_entry(head
->next
);
1403 nfs_list_remove_request(req
);
1404 nfs_mark_request_commit(req
);
1405 nfs_unlock_request(req
);
1411 * COMMIT call returned
1414 nfs_commit_done(struct rpc_task
*task
)
1416 struct nfs_write_data
*data
= (struct nfs_write_data
*)task
->tk_calldata
;
1417 struct nfs_writeres
*resp
= &data
->res
;
1418 struct nfs_page
*req
;
1419 struct inode
*inode
= data
->inode
;
1421 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1422 task
->tk_pid
, task
->tk_status
);
1424 nfs_write_attributes(inode
, resp
->fattr
);
1425 while (!list_empty(&data
->pages
)) {
1426 req
= nfs_list_entry(data
->pages
.next
);
1427 nfs_list_remove_request(req
);
1429 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1430 req
->wb_inode
->i_dev
,
1431 (long long)NFS_FILEID(req
->wb_inode
),
1433 (long long)(page_offset(req
->wb_page
) + req
->wb_offset
));
1434 if (task
->tk_status
< 0) {
1436 req
->wb_file
->f_error
= task
->tk_status
;
1437 nfs_inode_remove_request(req
);
1438 dprintk(", error = %d\n", task
->tk_status
);
1442 /* Okay, COMMIT succeeded, apparently. Check the verifier
1443 * returned by the server against all stored verfs. */
1444 if (!memcmp(req
->wb_verf
.verifier
, data
->verf
.verifier
, sizeof(data
->verf
.verifier
))) {
1445 /* We have a match */
1446 nfs_inode_remove_request(req
);
1450 /* We have a mismatch. Write the page again */
1451 dprintk(" mismatch\n");
1452 nfs_mark_request_dirty(req
);
1454 nfs_unlock_request(req
);
1459 int nfs_flush_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1460 unsigned int npages
, int how
)
1466 res
= nfs_scan_dirty(inode
, &head
, file
, idx_start
, npages
);
1468 error
= nfs_flush_list(inode
, &head
, how
);
1474 int nfs_flush_timeout(struct inode
*inode
, int how
)
1480 pages
= nfs_scan_dirty_timeout(inode
, &head
);
1482 error
= nfs_flush_list(inode
, &head
, how
);
1488 #ifdef CONFIG_NFS_V3
1489 int nfs_commit_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1490 unsigned int npages
, int how
)
1496 res
= nfs_scan_commit(inode
, &head
, file
, idx_start
, npages
);
1498 error
= nfs_commit_list(&head
, how
);
1504 int nfs_commit_timeout(struct inode
*inode
, int how
)
1510 pages
= nfs_scan_commit_timeout(inode
, &head
);
1512 pages
+= nfs_scan_commit(inode
, &head
, NULL
, 0, 0);
1513 error
= nfs_commit_list(&head
, how
);
1521 int nfs_sync_file(struct inode
*inode
, struct file
*file
, unsigned long idx_start
,
1522 unsigned int npages
, int how
)
1527 wait
= how
& FLUSH_WAIT
;
1531 inode
= file
->f_dentry
->d_inode
;
1536 error
= nfs_wait_on_requests(inode
, file
, idx_start
, npages
);
1538 error
= nfs_flush_file(inode
, file
, idx_start
, npages
, how
);
1539 #ifdef CONFIG_NFS_V3
1541 error
= nfs_commit_file(inode
, file
, idx_start
, npages
, how
);
1543 } while (error
> 0);
1547 int nfs_init_nfspagecache(void)
1549 nfs_page_cachep
= kmem_cache_create("nfs_page",
1550 sizeof(struct nfs_page
),
1551 0, SLAB_HWCACHE_ALIGN
,
1553 if (nfs_page_cachep
== NULL
)
1556 nfs_wdata_cachep
= kmem_cache_create("nfs_write_data",
1557 sizeof(struct nfs_write_data
),
1558 0, SLAB_HWCACHE_ALIGN
,
1560 if (nfs_wdata_cachep
== NULL
)
1566 void nfs_destroy_nfspagecache(void)
1568 if (kmem_cache_destroy(nfs_page_cachep
))
1569 printk(KERN_INFO
"nfs_page: not all structures were freed\n");
1570 if (kmem_cache_destroy(nfs_wdata_cachep
))
1571 printk(KERN_INFO
"nfs_write_data: not all structures were freed\n");