6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
9 * We do an ugly hack here in order to return proper error codes to the
10 * user program when a read request failed: since generic_file_read
11 * only checks the return value of inode->i_op->readpage() which is always 0
12 * for async RPC, we set the error bit of the page to 1 when an error occurs,
13 * and make nfs_readpage transmit requests synchronously when encountering this.
14 * This is only a small problem, though, since we now retry all operations
15 * within the RPC code when root squashing is suspected.
18 #include <linux/config.h>
19 #include <linux/time.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
25 #include <linux/slab.h>
26 #include <linux/pagemap.h>
27 #include <linux/mempool.h>
28 #include <linux/sunrpc/clnt.h>
29 #include <linux/nfs_fs.h>
30 #include <linux/nfs_page.h>
31 #include <linux/smp_lock.h>
33 #include <asm/system.h>
35 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
37 static int nfs_pagein_one(struct list_head
*, struct inode
*);
38 static void nfs_readpage_result_partial(struct nfs_read_data
*, int);
39 static void nfs_readpage_result_full(struct nfs_read_data
*, int);
41 static kmem_cache_t
*nfs_rdata_cachep
;
42 static mempool_t
*nfs_rdata_mempool
;
44 #define MIN_POOL_READ (32)
46 static struct nfs_read_data
*nfs_readdata_alloc(void)
48 struct nfs_read_data
*p
;
49 p
= (struct nfs_read_data
*)mempool_alloc(nfs_rdata_mempool
, SLAB_NOFS
);
51 memset(p
, 0, sizeof(*p
));
55 static __inline__
void nfs_readdata_free(struct nfs_read_data
*p
)
57 mempool_free(p
, nfs_rdata_mempool
);
60 static void nfs_readdata_release(struct rpc_task
*task
)
62 struct nfs_read_data
*data
= (struct nfs_read_data
*)task
->tk_calldata
;
63 nfs_readdata_free(data
);
67 unsigned int nfs_page_length(struct inode
*inode
, struct page
*page
)
69 loff_t i_size
= i_size_read(inode
);
74 idx
= (i_size
- 1) >> PAGE_CACHE_SHIFT
;
75 if (page
->index
> idx
)
77 if (page
->index
!= idx
)
78 return PAGE_CACHE_SIZE
;
79 return 1 + ((i_size
- 1) & (PAGE_CACHE_SIZE
- 1));
83 int nfs_return_empty_page(struct page
*page
)
85 memclear_highpage_flush(page
, 0, PAGE_CACHE_SIZE
);
86 SetPageUptodate(page
);
92 * Read a page synchronously.
94 static int nfs_readpage_sync(struct nfs_open_context
*ctx
, struct inode
*inode
,
97 unsigned int rsize
= NFS_SERVER(inode
)->rsize
;
98 unsigned int count
= PAGE_CACHE_SIZE
;
100 struct nfs_read_data
*rdata
;
102 rdata
= nfs_readdata_alloc();
106 memset(rdata
, 0, sizeof(*rdata
));
107 rdata
->flags
= (IS_SWAPFILE(inode
)? NFS_RPC_SWAPFLAGS
: 0);
108 rdata
->cred
= ctx
->cred
;
109 rdata
->inode
= inode
;
110 INIT_LIST_HEAD(&rdata
->pages
);
111 rdata
->args
.fh
= NFS_FH(inode
);
112 rdata
->args
.context
= ctx
;
113 rdata
->args
.pages
= &page
;
114 rdata
->args
.pgbase
= 0UL;
115 rdata
->args
.count
= rsize
;
116 rdata
->res
.fattr
= &rdata
->fattr
;
118 dprintk("NFS: nfs_readpage_sync(%p)\n", page
);
121 * This works now because the socket layer never tries to DMA
122 * into this buffer directly.
126 rdata
->args
.count
= count
;
127 rdata
->res
.count
= rdata
->args
.count
;
128 rdata
->args
.offset
= page_offset(page
) + rdata
->args
.pgbase
;
130 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
131 NFS_SERVER(inode
)->hostname
,
133 (long long)NFS_FILEID(inode
),
134 (unsigned long long)rdata
->args
.pgbase
,
138 result
= NFS_PROTO(inode
)->read(rdata
);
142 * Even if we had a partial success we can't mark the page
146 if (result
== -EISDIR
)
151 rdata
->args
.pgbase
+= result
;
152 /* Note: result == 0 should only happen if we're caching
153 * a write that extends the file and punches a hole.
155 if (rdata
->res
.eof
!= 0 || result
== 0)
158 NFS_FLAGS(inode
) |= NFS_INO_INVALID_ATIME
;
161 memclear_highpage_flush(page
, rdata
->args
.pgbase
, count
);
162 SetPageUptodate(page
);
164 ClearPageError(page
);
169 nfs_readdata_free(rdata
);
173 static int nfs_readpage_async(struct nfs_open_context
*ctx
, struct inode
*inode
,
176 LIST_HEAD(one_request
);
177 struct nfs_page
*new;
180 len
= nfs_page_length(inode
, page
);
182 return nfs_return_empty_page(page
);
183 new = nfs_create_request(ctx
, inode
, page
, 0, len
);
188 if (len
< PAGE_CACHE_SIZE
)
189 memclear_highpage_flush(page
, len
, PAGE_CACHE_SIZE
- len
);
191 nfs_lock_request(new);
192 nfs_list_add_request(new, &one_request
);
193 nfs_pagein_one(&one_request
, inode
);
197 static void nfs_readpage_release(struct nfs_page
*req
)
199 unlock_page(req
->wb_page
);
201 nfs_clear_request(req
);
202 nfs_release_request(req
);
203 nfs_unlock_request(req
);
205 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
206 req
->wb_context
->dentry
->d_inode
->i_sb
->s_id
,
207 (long long)NFS_FILEID(req
->wb_context
->dentry
->d_inode
),
209 (long long)req_offset(req
));
213 * Set up the NFS read request struct
215 static void nfs_read_rpcsetup(struct nfs_page
*req
, struct nfs_read_data
*data
,
216 unsigned int count
, unsigned int offset
)
221 data
->inode
= inode
= req
->wb_context
->dentry
->d_inode
;
222 data
->cred
= req
->wb_context
->cred
;
224 data
->args
.fh
= NFS_FH(inode
);
225 data
->args
.offset
= req_offset(req
) + offset
;
226 data
->args
.pgbase
= req
->wb_pgbase
+ offset
;
227 data
->args
.pages
= data
->pagevec
;
228 data
->args
.count
= count
;
229 data
->args
.context
= req
->wb_context
;
231 data
->res
.fattr
= &data
->fattr
;
232 data
->res
.count
= count
;
235 NFS_PROTO(inode
)->read_setup(data
);
237 data
->task
.tk_cookie
= (unsigned long)inode
;
238 data
->task
.tk_calldata
= data
;
239 /* Release requests */
240 data
->task
.tk_release
= nfs_readdata_release
;
242 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
245 (long long)NFS_FILEID(inode
),
247 (unsigned long long)data
->args
.offset
);
251 nfs_async_read_error(struct list_head
*head
)
253 struct nfs_page
*req
;
255 while (!list_empty(head
)) {
256 req
= nfs_list_entry(head
->next
);
257 nfs_list_remove_request(req
);
258 SetPageError(req
->wb_page
);
259 nfs_readpage_release(req
);
264 * Start an async read operation
266 static void nfs_execute_read(struct nfs_read_data
*data
)
268 struct rpc_clnt
*clnt
= NFS_CLIENT(data
->inode
);
271 rpc_clnt_sigmask(clnt
, &oldset
);
273 rpc_execute(&data
->task
);
275 rpc_clnt_sigunmask(clnt
, &oldset
);
279 * Generate multiple requests to fill a single page.
281 * We optimize to reduce the number of read operations on the wire. If we
282 * detect that we're reading a page, or an area of a page, that is past the
283 * end of file, we do not generate NFS read operations but just clear the
284 * parts of the page that would have come back zero from the server anyway.
286 * We rely on the cached value of i_size to make this determination; another
287 * client can fill pages on the server past our cached end-of-file, but we
288 * won't see the new data until our attribute cache is updated. This is more
289 * or less conventional NFS client behavior.
291 static int nfs_pagein_multi(struct list_head
*head
, struct inode
*inode
)
293 struct nfs_page
*req
= nfs_list_entry(head
->next
);
294 struct page
*page
= req
->wb_page
;
295 struct nfs_read_data
*data
;
296 unsigned int rsize
= NFS_SERVER(inode
)->rsize
;
297 unsigned int nbytes
, offset
;
301 nfs_list_remove_request(req
);
303 nbytes
= req
->wb_bytes
;
305 data
= nfs_readdata_alloc();
308 INIT_LIST_HEAD(&data
->pages
);
309 list_add(&data
->pages
, &list
);
315 atomic_set(&req
->wb_complete
, requests
);
317 ClearPageError(page
);
319 nbytes
= req
->wb_bytes
;
321 data
= list_entry(list
.next
, struct nfs_read_data
, pages
);
322 list_del_init(&data
->pages
);
324 data
->pagevec
[0] = page
;
325 data
->complete
= nfs_readpage_result_partial
;
327 if (nbytes
> rsize
) {
328 nfs_read_rpcsetup(req
, data
, rsize
, offset
);
332 nfs_read_rpcsetup(req
, data
, nbytes
, offset
);
335 nfs_execute_read(data
);
336 } while (nbytes
!= 0);
341 while (!list_empty(&list
)) {
342 data
= list_entry(list
.next
, struct nfs_read_data
, pages
);
343 list_del(&data
->pages
);
344 nfs_readdata_free(data
);
347 nfs_readpage_release(req
);
351 static int nfs_pagein_one(struct list_head
*head
, struct inode
*inode
)
353 struct nfs_page
*req
;
355 struct nfs_read_data
*data
;
358 if (NFS_SERVER(inode
)->rsize
< PAGE_CACHE_SIZE
)
359 return nfs_pagein_multi(head
, inode
);
361 data
= nfs_readdata_alloc();
365 INIT_LIST_HEAD(&data
->pages
);
366 pages
= data
->pagevec
;
368 while (!list_empty(head
)) {
369 req
= nfs_list_entry(head
->next
);
370 nfs_list_remove_request(req
);
371 nfs_list_add_request(req
, &data
->pages
);
372 ClearPageError(req
->wb_page
);
373 *pages
++ = req
->wb_page
;
374 count
+= req
->wb_bytes
;
376 req
= nfs_list_entry(data
->pages
.next
);
378 data
->complete
= nfs_readpage_result_full
;
379 nfs_read_rpcsetup(req
, data
, count
, 0);
381 nfs_execute_read(data
);
384 nfs_async_read_error(head
);
389 nfs_pagein_list(struct list_head
*head
, int rpages
)
391 LIST_HEAD(one_request
);
392 struct nfs_page
*req
;
394 unsigned int pages
= 0;
396 while (!list_empty(head
)) {
397 pages
+= nfs_coalesce_requests(head
, &one_request
, rpages
);
398 req
= nfs_list_entry(one_request
.next
);
399 error
= nfs_pagein_one(&one_request
, req
->wb_context
->dentry
->d_inode
);
406 nfs_async_read_error(head
);
411 * Handle a read reply that fills part of a page.
413 static void nfs_readpage_result_partial(struct nfs_read_data
*data
, int status
)
415 struct nfs_page
*req
= data
->req
;
416 struct page
*page
= req
->wb_page
;
419 unsigned int request
= data
->args
.count
;
420 unsigned int result
= data
->res
.count
;
422 if (result
< request
) {
423 memclear_highpage_flush(page
,
424 data
->args
.pgbase
+ result
,
430 if (atomic_dec_and_test(&req
->wb_complete
)) {
431 if (!PageError(page
))
432 SetPageUptodate(page
);
433 nfs_readpage_release(req
);
438 * This is the callback from RPC telling us whether a reply was
439 * received or some error occurred (timeout or socket shutdown).
441 static void nfs_readpage_result_full(struct nfs_read_data
*data
, int status
)
443 unsigned int count
= data
->res
.count
;
445 while (!list_empty(&data
->pages
)) {
446 struct nfs_page
*req
= nfs_list_entry(data
->pages
.next
);
447 struct page
*page
= req
->wb_page
;
448 nfs_list_remove_request(req
);
451 if (count
< PAGE_CACHE_SIZE
) {
452 if (count
< req
->wb_bytes
)
453 memclear_highpage_flush(page
,
454 req
->wb_pgbase
+ count
,
455 req
->wb_bytes
- count
);
458 count
-= PAGE_CACHE_SIZE
;
459 SetPageUptodate(page
);
462 nfs_readpage_release(req
);
467 * This is the callback from RPC telling us whether a reply was
468 * received or some error occurred (timeout or socket shutdown).
470 void nfs_readpage_result(struct rpc_task
*task
)
472 struct nfs_read_data
*data
= (struct nfs_read_data
*)task
->tk_calldata
;
473 struct nfs_readargs
*argp
= &data
->args
;
474 struct nfs_readres
*resp
= &data
->res
;
475 int status
= task
->tk_status
;
477 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
478 task
->tk_pid
, status
);
480 /* Is this a short read? */
481 if (task
->tk_status
>= 0 && resp
->count
< argp
->count
&& !resp
->eof
) {
482 /* Has the server at least made some progress? */
483 if (resp
->count
!= 0) {
484 /* Yes, so retry the read at the end of the data */
485 argp
->offset
+= resp
->count
;
486 argp
->pgbase
+= resp
->count
;
487 argp
->count
-= resp
->count
;
488 rpc_restart_call(task
);
491 task
->tk_status
= -EIO
;
493 NFS_FLAGS(data
->inode
) |= NFS_INO_INVALID_ATIME
;
494 data
->complete(data
, status
);
498 * Read a page over NFS.
499 * We read the page synchronously in the following case:
500 * - The error flag is set for this page. This happens only when a
501 * previous async read operation failed.
503 int nfs_readpage(struct file
*file
, struct page
*page
)
505 struct nfs_open_context
*ctx
;
506 struct inode
*inode
= page
->mapping
->host
;
509 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
510 page
, PAGE_CACHE_SIZE
, page
->index
);
512 * Try to flush any pending writes to the file..
514 * NOTE! Because we own the page lock, there cannot
515 * be any new pending writes generated at this point
516 * for this page (other pages can be written to).
518 error
= nfs_wb_page(inode
, page
);
523 ctx
= nfs_find_open_context(inode
, FMODE_READ
);
527 ctx
= get_nfs_open_context((struct nfs_open_context
*)
529 if (!IS_SYNC(inode
)) {
530 error
= nfs_readpage_async(ctx
, inode
, page
);
534 error
= nfs_readpage_sync(ctx
, inode
, page
);
535 if (error
< 0 && IS_SWAPFILE(inode
))
536 printk("Aiee.. nfs swap-in of page failed!\n");
538 put_nfs_open_context(ctx
);
546 struct nfs_readdesc
{
547 struct list_head
*head
;
548 struct nfs_open_context
*ctx
;
552 readpage_async_filler(void *data
, struct page
*page
)
554 struct nfs_readdesc
*desc
= (struct nfs_readdesc
*)data
;
555 struct inode
*inode
= page
->mapping
->host
;
556 struct nfs_page
*new;
559 nfs_wb_page(inode
, page
);
560 len
= nfs_page_length(inode
, page
);
562 return nfs_return_empty_page(page
);
563 new = nfs_create_request(desc
->ctx
, inode
, page
, 0, len
);
569 if (len
< PAGE_CACHE_SIZE
)
570 memclear_highpage_flush(page
, len
, PAGE_CACHE_SIZE
- len
);
571 nfs_lock_request(new);
572 nfs_list_add_request(new, desc
->head
);
576 int nfs_readpages(struct file
*filp
, struct address_space
*mapping
,
577 struct list_head
*pages
, unsigned nr_pages
)
580 struct nfs_readdesc desc
= {
583 struct inode
*inode
= mapping
->host
;
584 struct nfs_server
*server
= NFS_SERVER(inode
);
587 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
589 (long long)NFS_FILEID(inode
),
593 desc
.ctx
= nfs_find_open_context(inode
, FMODE_READ
);
594 if (desc
.ctx
== NULL
)
597 desc
.ctx
= get_nfs_open_context((struct nfs_open_context
*)
599 ret
= read_cache_pages(mapping
, pages
, readpage_async_filler
, &desc
);
600 if (!list_empty(&head
)) {
601 int err
= nfs_pagein_list(&head
, server
->rpages
);
605 put_nfs_open_context(desc
.ctx
);
609 int nfs_init_readpagecache(void)
611 nfs_rdata_cachep
= kmem_cache_create("nfs_read_data",
612 sizeof(struct nfs_read_data
),
613 0, SLAB_HWCACHE_ALIGN
,
615 if (nfs_rdata_cachep
== NULL
)
618 nfs_rdata_mempool
= mempool_create(MIN_POOL_READ
,
622 if (nfs_rdata_mempool
== NULL
)
628 void nfs_destroy_readpagecache(void)
630 mempool_destroy(nfs_rdata_mempool
);
631 if (kmem_cache_destroy(nfs_rdata_cachep
))
632 printk(KERN_INFO
"nfs_read_data: not all structures were freed\n");