6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
9 * We do an ugly hack here in order to return proper error codes to the
10 * user program when a read request failed: since generic_file_read
11 * only checks the return value of inode->i_op->readpage() which is always 0
12 * for async RPC, we set the error bit of the page to 1 when an error occurs,
13 * and make nfs_readpage transmit requests synchronously when encountering this.
14 * This is only a small problem, though, since we now retry all operations
15 * within the RPC code when root squashing is suspected.
18 #include <linux/config.h>
19 #include <linux/sched.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
25 #include <linux/malloc.h>
26 #include <linux/pagemap.h>
27 #include <linux/sunrpc/clnt.h>
28 #include <linux/nfs_fs.h>
29 #include <linux/nfs_page.h>
30 #include <linux/nfs_flushd.h>
31 #include <linux/smp_lock.h>
33 #include <asm/system.h>
35 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
37 struct nfs_read_data
{
40 struct rpc_cred
*cred
;
41 struct nfs_readargs args
; /* XDR argument struct */
42 struct nfs_readres res
; /* ... and result struct */
43 struct nfs_fattr fattr
; /* fattr storage */
44 struct list_head pages
; /* Coalesced read requests */
48 * Local function declarations
50 static void nfs_readpage_result(struct rpc_task
*task
);
52 /* Hack for future NFS swap support */
54 # define IS_SWAPFILE(inode) (0)
57 static kmem_cache_t
*nfs_rdata_cachep
;
59 static __inline__
struct nfs_read_data
*nfs_readdata_alloc(void)
61 struct nfs_read_data
*p
;
62 p
= kmem_cache_alloc(nfs_rdata_cachep
, SLAB_NFS
);
64 memset(p
, 0, sizeof(*p
));
65 INIT_LIST_HEAD(&p
->pages
);
70 static __inline__
void nfs_readdata_free(struct nfs_read_data
*p
)
72 kmem_cache_free(nfs_rdata_cachep
, p
);
75 static void nfs_readdata_release(struct rpc_task
*task
)
77 struct nfs_read_data
*data
= (struct nfs_read_data
*)task
->tk_calldata
;
78 nfs_readdata_free(data
);
82 * Read a page synchronously.
85 nfs_readpage_sync(struct file
*file
, struct inode
*inode
, struct page
*page
)
87 struct dentry
*dentry
= file
->f_dentry
;
88 struct rpc_cred
*cred
= nfs_file_cred(file
);
89 struct nfs_fattr fattr
;
90 loff_t offset
= page_offset(page
);
92 int rsize
= NFS_SERVER(inode
)->rsize
;
94 int count
= PAGE_CACHE_SIZE
;
95 int flags
= IS_SWAPFILE(inode
)? NFS_RPC_SWAPFLAGS
: 0;
98 dprintk("NFS: nfs_readpage_sync(%p)\n", page
);
101 * This works now because the socket layer never tries to DMA
102 * into this buffer directly.
109 dprintk("NFS: nfs_proc_read(%s, (%s/%s), %Ld, %d, %p)\n",
110 NFS_SERVER(inode
)->hostname
,
111 dentry
->d_parent
->d_name
.name
, dentry
->d_name
.name
,
112 (long long)offset
, rsize
, buffer
);
115 result
= NFS_PROTO(inode
)->read(inode
, cred
, &fattr
, flags
,
116 offset
, rsize
, buffer
, &eof
);
118 nfs_refresh_inode(inode
, &fattr
);
121 * Even if we had a partial success we can't mark the page
125 if (result
== -EISDIR
)
132 if (result
< rsize
) /* NFSv2ism */
136 memset(buffer
, 0, count
);
137 flush_dcache_page(page
);
138 SetPageUptodate(page
);
140 ClearPageError(page
);
149 static inline struct nfs_page
*
150 _nfs_find_read(struct inode
*inode
, struct page
*page
)
152 struct list_head
*head
, *next
;
154 head
= &inode
->u
.nfs_i
.read
;
156 while (next
!= head
) {
157 struct nfs_page
*req
= nfs_list_entry(next
);
159 if (page_index(req
->wb_page
) != page_index(page
))
167 static struct nfs_page
*
168 nfs_find_read(struct inode
*inode
, struct page
*page
)
170 struct nfs_page
*req
;
171 spin_lock(&nfs_wreq_lock
);
172 req
= _nfs_find_read(inode
, page
);
173 spin_unlock(&nfs_wreq_lock
);
178 * Add a request to the inode's asynchronous read list.
181 nfs_mark_request_read(struct nfs_page
*req
)
183 struct inode
*inode
= req
->wb_inode
;
185 spin_lock(&nfs_wreq_lock
);
186 if (list_empty(&req
->wb_list
)) {
187 nfs_list_add_request(req
, &inode
->u
.nfs_i
.read
);
188 inode
->u
.nfs_i
.nread
++;
190 spin_unlock(&nfs_wreq_lock
);
192 * NB: the call to inode_schedule_scan() must lie outside the
193 * spinlock since it can run flushd().
195 inode_schedule_scan(inode
, req
->wb_timeout
);
199 nfs_readpage_async(struct file
*file
, struct inode
*inode
, struct page
*page
)
201 struct nfs_page
*req
, *new = NULL
;
206 if (Page_Uptodate(page
))
209 req
= nfs_find_read(inode
, page
);
211 if (page
!= req
->wb_page
) {
212 nfs_release_request(req
);
213 nfs_pagein_inode(inode
, page_index(page
), 0);
216 nfs_release_request(req
);
221 nfs_lock_request(new);
222 new->wb_timeout
= jiffies
+ NFS_READ_DELAY
;
223 nfs_mark_request_read(new);
224 nfs_unlock_request(new);
230 new = nfs_create_request(file
, inode
, page
, 0, PAGE_CACHE_SIZE
);
235 if (inode
->u
.nfs_i
.nread
>= NFS_SERVER(inode
)->rpages
||
236 page_index(page
) == (inode
->i_size
+ PAGE_CACHE_SIZE
- 1) >> PAGE_CACHE_SHIFT
)
237 nfs_pagein_inode(inode
, 0, 0);
239 nfs_release_request(new);
244 * Set up the NFS read request struct
247 nfs_read_rpcsetup(struct list_head
*head
, struct nfs_read_data
*data
)
249 struct nfs_page
*req
;
253 iov
= data
->args
.iov
;
255 while (!list_empty(head
)) {
256 struct nfs_page
*req
= nfs_list_entry(head
->next
);
257 nfs_list_remove_request(req
);
258 nfs_list_add_request(req
, &data
->pages
);
259 iov
->iov_base
= kmap(req
->wb_page
) + req
->wb_offset
;
260 iov
->iov_len
= req
->wb_bytes
;
261 count
+= req
->wb_bytes
;
265 req
= nfs_list_entry(data
->pages
.next
);
266 data
->inode
= req
->wb_inode
;
267 data
->cred
= req
->wb_cred
;
268 data
->args
.fh
= NFS_FH(req
->wb_inode
);
269 data
->args
.offset
= page_offset(req
->wb_page
) + req
->wb_offset
;
270 data
->args
.count
= count
;
271 data
->res
.fattr
= &data
->fattr
;
272 data
->res
.count
= count
;
277 nfs_async_read_error(struct list_head
*head
)
279 struct nfs_page
*req
;
282 while (!list_empty(head
)) {
283 req
= nfs_list_entry(head
->next
);
285 nfs_list_remove_request(req
);
288 nfs_unlock_request(req
);
289 nfs_release_request(req
);
294 nfs_pagein_one(struct list_head
*head
, struct inode
*inode
)
296 struct rpc_task
*task
;
297 struct rpc_clnt
*clnt
= NFS_CLIENT(inode
);
298 struct nfs_read_data
*data
;
299 struct rpc_message msg
;
303 data
= nfs_readdata_alloc();
308 /* N.B. Do we need to test? Never called for swapfile inode */
309 flags
= RPC_TASK_ASYNC
| (IS_SWAPFILE(inode
)? NFS_RPC_SWAPFLAGS
: 0);
311 nfs_read_rpcsetup(head
, data
);
313 /* Finalize the task. */
314 rpc_init_task(task
, clnt
, nfs_readpage_result
, flags
);
315 task
->tk_calldata
= data
;
316 /* Release requests */
317 task
->tk_release
= nfs_readdata_release
;
320 msg
.rpc_proc
= (NFS_PROTO(inode
)->version
== 3) ? NFS3PROC_READ
: NFSPROC_READ
;
322 msg
.rpc_proc
= NFSPROC_READ
;
324 msg
.rpc_argp
= &data
->args
;
325 msg
.rpc_resp
= &data
->res
;
326 msg
.rpc_cred
= data
->cred
;
328 /* Start the async call */
329 dprintk("NFS: %4d initiated read call (req %x/%Ld count %d nriov %d.\n",
331 inode
->i_dev
, (long long)NFS_FILEID(inode
),
332 data
->args
.count
, data
->args
.nriov
);
334 rpc_clnt_sigmask(clnt
, &oldset
);
335 rpc_call_setup(task
, &msg
, 0);
337 rpc_clnt_sigunmask(clnt
, &oldset
);
340 nfs_async_read_error(head
);
345 nfs_pagein_list(struct inode
*inode
, struct list_head
*head
)
347 LIST_HEAD(one_request
);
348 struct nfs_page
*req
;
350 unsigned int pages
= 0,
351 rpages
= NFS_SERVER(inode
)->rpages
;
353 while (!list_empty(head
)) {
354 pages
+= nfs_coalesce_requests(head
, &one_request
, rpages
);
355 req
= nfs_list_entry(one_request
.next
);
356 error
= nfs_pagein_one(&one_request
, req
->wb_inode
);
363 nfs_async_read_error(head
);
368 nfs_scan_read_timeout(struct inode
*inode
, struct list_head
*dst
)
371 spin_lock(&nfs_wreq_lock
);
372 pages
= nfs_scan_list_timeout(&inode
->u
.nfs_i
.read
, dst
, inode
);
373 inode
->u
.nfs_i
.nread
-= pages
;
374 if ((inode
->u
.nfs_i
.nread
== 0) != list_empty(&inode
->u
.nfs_i
.read
))
375 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.nread.\n");
376 spin_unlock(&nfs_wreq_lock
);
381 nfs_scan_read(struct inode
*inode
, struct list_head
*dst
, unsigned long idx_start
, unsigned int npages
)
384 spin_lock(&nfs_wreq_lock
);
385 res
= nfs_scan_list(&inode
->u
.nfs_i
.read
, dst
, NULL
, idx_start
, npages
);
386 inode
->u
.nfs_i
.nread
-= res
;
387 if ((inode
->u
.nfs_i
.nread
== 0) != list_empty(&inode
->u
.nfs_i
.read
))
388 printk(KERN_ERR
"NFS: desynchronized value of nfs_i.nread.\n");
389 spin_unlock(&nfs_wreq_lock
);
393 int nfs_pagein_inode(struct inode
*inode
, unsigned long idx_start
,
400 res
= nfs_scan_read(inode
, &head
, idx_start
, npages
);
402 error
= nfs_pagein_list(inode
, &head
);
408 int nfs_pagein_timeout(struct inode
*inode
)
414 pages
= nfs_scan_read_timeout(inode
, &head
);
416 error
= nfs_pagein_list(inode
, &head
);
423 * This is the callback from RPC telling us whether a reply was
424 * received or some error occurred (timeout or socket shutdown).
427 nfs_readpage_result(struct rpc_task
*task
)
429 struct nfs_read_data
*data
= (struct nfs_read_data
*) task
->tk_calldata
;
430 struct inode
*inode
= data
->inode
;
431 int count
= data
->res
.count
;
433 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
434 task
->tk_pid
, task
->tk_status
);
436 nfs_refresh_inode(inode
, &data
->fattr
);
437 while (!list_empty(&data
->pages
)) {
438 struct nfs_page
*req
= nfs_list_entry(data
->pages
.next
);
439 struct page
*page
= req
->wb_page
;
440 nfs_list_remove_request(req
);
442 if (task
->tk_status
>= 0 && count
>= 0) {
443 SetPageUptodate(page
);
444 count
-= PAGE_CACHE_SIZE
;
447 flush_dcache_page(page
);
451 dprintk("NFS: read (%x/%Ld %d@%Ld)\n",
452 req
->wb_inode
->i_dev
,
453 (long long)NFS_FILEID(req
->wb_inode
),
455 (long long)(page_offset(page
) + req
->wb_offset
));
456 nfs_unlock_request(req
);
457 nfs_release_request(req
);
462 * Read a page over NFS.
463 * We read the page synchronously in the following cases:
464 * - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
465 * around this by creating several consecutive read requests, but
466 * that's hardly worth it.
467 * - The error flag is set for this page. This happens only when a
468 * previous async read operation failed.
471 nfs_readpage(struct file
*file
, struct page
*page
)
477 struct address_space
*mapping
= page
->mapping
;
480 inode
= (struct inode
*)mapping
->host
;
482 inode
= file
->f_dentry
->d_inode
;
486 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
487 page
, PAGE_CACHE_SIZE
, page
->index
);
489 * Try to flush any pending writes to the file..
491 * NOTE! Because we own the page lock, there cannot
492 * be any new pending writes generated at this point
493 * for this page (other pages can be written to).
495 error
= nfs_wb_page(inode
, page
);
500 if (!PageError(page
) && NFS_SERVER(inode
)->rsize
>= PAGE_CACHE_SIZE
)
501 error
= nfs_readpage_async(file
, inode
, page
);
505 error
= nfs_readpage_sync(file
, inode
, page
);
506 if (error
< 0 && IS_SWAPFILE(inode
))
507 printk("Aiee.. nfs swap-in of page failed!\n");
516 int nfs_init_readpagecache(void)
518 nfs_rdata_cachep
= kmem_cache_create("nfs_read_data",
519 sizeof(struct nfs_read_data
),
520 0, SLAB_HWCACHE_ALIGN
,
522 if (nfs_rdata_cachep
== NULL
)
528 void nfs_destroy_readpagecache(void)
530 if (kmem_cache_destroy(nfs_rdata_cachep
))
531 printk(KERN_INFO
"nfs_read_data: not all structures were freed\n");