1 /* Storage object read/write
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
12 #include <linux/mount.h>
13 #include <linux/file.h>
17 * detect wake up events generated by the unlocking of pages in which we're
19 * - we use this to detect read completion of backing pages
20 * - the caller holds the waitqueue lock
22 static int cachefiles_read_waiter(wait_queue_t
*wait
, unsigned mode
,
25 struct cachefiles_one_read
*monitor
=
26 container_of(wait
, struct cachefiles_one_read
, monitor
);
27 struct cachefiles_object
*object
;
28 struct wait_bit_key
*key
= _key
;
29 struct page
*page
= wait
->private;
33 _enter("{%lu},%u,%d,{%p,%u}",
34 monitor
->netfs_page
->index
, mode
, sync
,
35 key
->flags
, key
->bit_nr
);
37 if (key
->flags
!= &page
->flags
||
38 key
->bit_nr
!= PG_locked
)
41 _debug("--- monitor %p %lx ---", page
, page
->flags
);
43 if (!PageUptodate(page
) && !PageError(page
))
46 /* remove from the waitqueue */
47 list_del(&wait
->task_list
);
49 /* move onto the action list and queue for FS-Cache thread pool */
52 object
= container_of(monitor
->op
->op
.object
,
53 struct cachefiles_object
, fscache
);
55 spin_lock(&object
->work_lock
);
56 list_add_tail(&monitor
->op_link
, &monitor
->op
->to_do
);
57 spin_unlock(&object
->work_lock
);
59 fscache_enqueue_retrieval(monitor
->op
);
64 * copy data from backing pages to netfs pages to complete a read operation
65 * - driven by FS-Cache's thread pool
67 static void cachefiles_read_copier(struct fscache_operation
*_op
)
69 struct cachefiles_one_read
*monitor
;
70 struct cachefiles_object
*object
;
71 struct fscache_retrieval
*op
;
72 struct pagevec pagevec
;
75 op
= container_of(_op
, struct fscache_retrieval
, op
);
76 object
= container_of(op
->op
.object
,
77 struct cachefiles_object
, fscache
);
79 _enter("{ino=%lu}", object
->backer
->d_inode
->i_ino
);
81 pagevec_init(&pagevec
, 0);
84 spin_lock_irq(&object
->work_lock
);
86 while (!list_empty(&op
->to_do
)) {
87 monitor
= list_entry(op
->to_do
.next
,
88 struct cachefiles_one_read
, op_link
);
89 list_del(&monitor
->op_link
);
91 spin_unlock_irq(&object
->work_lock
);
93 _debug("- copy {%lu}", monitor
->back_page
->index
);
96 if (PageUptodate(monitor
->back_page
)) {
97 copy_highpage(monitor
->netfs_page
, monitor
->back_page
);
99 pagevec_add(&pagevec
, monitor
->netfs_page
);
100 fscache_mark_pages_cached(monitor
->op
, &pagevec
);
105 cachefiles_io_error_obj(
107 "Readpage failed on backing file %lx",
108 (unsigned long) monitor
->back_page
->flags
);
110 page_cache_release(monitor
->back_page
);
112 fscache_end_io(op
, monitor
->netfs_page
, error
);
113 page_cache_release(monitor
->netfs_page
);
114 fscache_put_retrieval(op
);
117 /* let the thread pool have some air occasionally */
119 if (max
< 0 || need_resched()) {
120 if (!list_empty(&op
->to_do
))
121 fscache_enqueue_retrieval(op
);
122 _leave(" [maxed out]");
126 spin_lock_irq(&object
->work_lock
);
129 spin_unlock_irq(&object
->work_lock
);
134 * read the corresponding page to the given set from the backing file
135 * - an uncertain page is simply discarded, to be tried again another time
137 static int cachefiles_read_backing_file_one(struct cachefiles_object
*object
,
138 struct fscache_retrieval
*op
,
139 struct page
*netpage
,
140 struct pagevec
*pagevec
)
142 struct cachefiles_one_read
*monitor
;
143 struct address_space
*bmapping
;
144 struct page
*newpage
, *backpage
;
149 pagevec_reinit(pagevec
);
151 _debug("read back %p{%lu,%d}",
152 netpage
, netpage
->index
, page_count(netpage
));
154 monitor
= kzalloc(sizeof(*monitor
), GFP_KERNEL
);
158 monitor
->netfs_page
= netpage
;
159 monitor
->op
= fscache_get_retrieval(op
);
161 init_waitqueue_func_entry(&monitor
->monitor
, cachefiles_read_waiter
);
163 /* attempt to get hold of the backing page */
164 bmapping
= object
->backer
->d_inode
->i_mapping
;
168 backpage
= find_get_page(bmapping
, netpage
->index
);
170 goto backing_page_already_present
;
173 newpage
= page_cache_alloc_cold(bmapping
);
178 ret
= add_to_page_cache(newpage
, bmapping
,
179 netpage
->index
, GFP_KERNEL
);
181 goto installed_new_backing_page
;
186 /* we've installed a new backing page, so now we need to add it
187 * to the LRU list and start it reading */
188 installed_new_backing_page
:
189 _debug("- new %p", newpage
);
194 page_cache_get(backpage
);
195 pagevec_add(pagevec
, backpage
);
196 __pagevec_lru_add_file(pagevec
);
199 ret
= bmapping
->a_ops
->readpage(NULL
, backpage
);
203 /* set the monitor to transfer the data across */
204 monitor_backing_page
:
205 _debug("- monitor add");
207 /* install the monitor */
208 page_cache_get(monitor
->netfs_page
);
209 page_cache_get(backpage
);
210 monitor
->back_page
= backpage
;
211 monitor
->monitor
.private = backpage
;
212 add_page_wait_queue(backpage
, &monitor
->monitor
);
215 /* but the page may have been read before the monitor was installed, so
216 * the monitor may miss the event - so we have to ensure that we do get
217 * one in such a case */
218 if (trylock_page(backpage
)) {
219 _debug("jumpstart %p {%lx}", backpage
, backpage
->flags
);
220 unlock_page(backpage
);
224 /* if the backing page is already present, it can be in one of
225 * three states: read in progress, read failed or read okay */
226 backing_page_already_present
:
230 page_cache_release(newpage
);
234 if (PageError(backpage
))
237 if (PageUptodate(backpage
))
238 goto backing_page_already_uptodate
;
240 if (!trylock_page(backpage
))
241 goto monitor_backing_page
;
242 _debug("read %p {%lx}", backpage
, backpage
->flags
);
243 goto read_backing_page
;
245 /* the backing page is already up to date, attach the netfs
246 * page to the pagecache and LRU and copy the data across */
247 backing_page_already_uptodate
:
248 _debug("- uptodate");
250 pagevec_add(pagevec
, netpage
);
251 fscache_mark_pages_cached(op
, pagevec
);
253 copy_highpage(netpage
, backpage
);
254 fscache_end_io(op
, netpage
, 0);
262 page_cache_release(backpage
);
264 fscache_put_retrieval(monitor
->op
);
267 _leave(" = %d", ret
);
271 _debug("read error %d", ret
);
275 cachefiles_io_error_obj(object
, "Page read error on backing file");
280 page_cache_release(newpage
);
282 fscache_put_retrieval(monitor
->op
);
285 _leave(" = -ENOMEM");
290 * read a page from the cache or allocate a block in which to store it
291 * - cache withdrawal is prevented by the caller
292 * - returns -EINTR if interrupted
293 * - returns -ENOMEM if ran out of memory
294 * - returns -ENOBUFS if no buffers can be made available
295 * - returns -ENOBUFS if page is beyond EOF
296 * - if the page is backed by a block in the cache:
297 * - a read will be started which will call the callback on completion
298 * - 0 will be returned
299 * - else if the page is unbacked:
300 * - the metadata will be retained
301 * - -ENODATA will be returned
303 int cachefiles_read_or_alloc_page(struct fscache_retrieval
*op
,
307 struct cachefiles_object
*object
;
308 struct cachefiles_cache
*cache
;
309 struct pagevec pagevec
;
311 sector_t block0
, block
;
315 object
= container_of(op
->op
.object
,
316 struct cachefiles_object
, fscache
);
317 cache
= container_of(object
->fscache
.cache
,
318 struct cachefiles_cache
, cache
);
320 _enter("{%p},{%lx},,,", object
, page
->index
);
325 inode
= object
->backer
->d_inode
;
326 ASSERT(S_ISREG(inode
->i_mode
));
327 ASSERT(inode
->i_mapping
->a_ops
->bmap
);
328 ASSERT(inode
->i_mapping
->a_ops
->readpages
);
330 /* calculate the shift required to use bmap */
331 if (inode
->i_sb
->s_blocksize
> PAGE_SIZE
)
334 shift
= PAGE_SHIFT
- inode
->i_sb
->s_blocksize_bits
;
336 op
->op
.flags
= FSCACHE_OP_FAST
;
337 op
->op
.processor
= cachefiles_read_copier
;
339 pagevec_init(&pagevec
, 0);
341 /* we assume the absence or presence of the first block is a good
342 * enough indication for the page as a whole
343 * - TODO: don't use bmap() for this as it is _not_ actually good
344 * enough for this as it doesn't indicate errors, but it's all we've
347 block0
= page
->index
;
350 block
= inode
->i_mapping
->a_ops
->bmap(inode
->i_mapping
, block0
);
351 _debug("%llx -> %llx",
352 (unsigned long long) block0
,
353 (unsigned long long) block
);
356 /* submit the apparently valid page to the backing fs to be
358 ret
= cachefiles_read_backing_file_one(object
, op
, page
,
360 } else if (cachefiles_has_space(cache
, 0, 1) == 0) {
361 /* there's space in the cache we can use */
362 pagevec_add(&pagevec
, page
);
363 fscache_mark_pages_cached(op
, &pagevec
);
369 _leave(" = %d", ret
);
374 * read the corresponding pages to the given set from the backing file
375 * - any uncertain pages are simply discarded, to be tried again another time
377 static int cachefiles_read_backing_file(struct cachefiles_object
*object
,
378 struct fscache_retrieval
*op
,
379 struct list_head
*list
,
380 struct pagevec
*mark_pvec
)
382 struct cachefiles_one_read
*monitor
= NULL
;
383 struct address_space
*bmapping
= object
->backer
->d_inode
->i_mapping
;
384 struct pagevec lru_pvec
;
385 struct page
*newpage
= NULL
, *netpage
, *_n
, *backpage
= NULL
;
390 pagevec_init(&lru_pvec
, 0);
392 list_for_each_entry_safe(netpage
, _n
, list
, lru
) {
393 list_del(&netpage
->lru
);
395 _debug("read back %p{%lu,%d}",
396 netpage
, netpage
->index
, page_count(netpage
));
399 monitor
= kzalloc(sizeof(*monitor
), GFP_KERNEL
);
403 monitor
->op
= fscache_get_retrieval(op
);
404 init_waitqueue_func_entry(&monitor
->monitor
,
405 cachefiles_read_waiter
);
409 backpage
= find_get_page(bmapping
, netpage
->index
);
411 goto backing_page_already_present
;
414 newpage
= page_cache_alloc_cold(bmapping
);
419 ret
= add_to_page_cache(newpage
, bmapping
,
420 netpage
->index
, GFP_KERNEL
);
422 goto installed_new_backing_page
;
427 /* we've installed a new backing page, so now we need to add it
428 * to the LRU list and start it reading */
429 installed_new_backing_page
:
430 _debug("- new %p", newpage
);
435 page_cache_get(backpage
);
436 if (!pagevec_add(&lru_pvec
, backpage
))
437 __pagevec_lru_add_file(&lru_pvec
);
440 ret
= bmapping
->a_ops
->readpage(NULL
, backpage
);
444 /* add the netfs page to the pagecache and LRU, and set the
445 * monitor to transfer the data across */
446 monitor_backing_page
:
447 _debug("- monitor add");
449 ret
= add_to_page_cache(netpage
, op
->mapping
, netpage
->index
,
452 if (ret
== -EEXIST
) {
453 page_cache_release(netpage
);
459 page_cache_get(netpage
);
460 if (!pagevec_add(&lru_pvec
, netpage
))
461 __pagevec_lru_add_file(&lru_pvec
);
463 /* install a monitor */
464 page_cache_get(netpage
);
465 monitor
->netfs_page
= netpage
;
467 page_cache_get(backpage
);
468 monitor
->back_page
= backpage
;
469 monitor
->monitor
.private = backpage
;
470 add_page_wait_queue(backpage
, &monitor
->monitor
);
473 /* but the page may have been read before the monitor was
474 * installed, so the monitor may miss the event - so we have to
475 * ensure that we do get one in such a case */
476 if (trylock_page(backpage
)) {
477 _debug("2unlock %p {%lx}", backpage
, backpage
->flags
);
478 unlock_page(backpage
);
481 page_cache_release(backpage
);
484 page_cache_release(netpage
);
488 /* if the backing page is already present, it can be in one of
489 * three states: read in progress, read failed or read okay */
490 backing_page_already_present
:
491 _debug("- present %p", backpage
);
493 if (PageError(backpage
))
496 if (PageUptodate(backpage
))
497 goto backing_page_already_uptodate
;
499 _debug("- not ready %p{%lx}", backpage
, backpage
->flags
);
501 if (!trylock_page(backpage
))
502 goto monitor_backing_page
;
504 if (PageError(backpage
)) {
505 _debug("error %lx", backpage
->flags
);
506 unlock_page(backpage
);
510 if (PageUptodate(backpage
))
511 goto backing_page_already_uptodate_unlock
;
513 /* we've locked a page that's neither up to date nor erroneous,
514 * so we need to attempt to read it again */
515 goto reread_backing_page
;
517 /* the backing page is already up to date, attach the netfs
518 * page to the pagecache and LRU and copy the data across */
519 backing_page_already_uptodate_unlock
:
520 _debug("uptodate %lx", backpage
->flags
);
521 unlock_page(backpage
);
522 backing_page_already_uptodate
:
523 _debug("- uptodate");
525 ret
= add_to_page_cache(netpage
, op
->mapping
, netpage
->index
,
528 if (ret
== -EEXIST
) {
529 page_cache_release(netpage
);
535 copy_highpage(netpage
, backpage
);
537 page_cache_release(backpage
);
540 if (!pagevec_add(mark_pvec
, netpage
))
541 fscache_mark_pages_cached(op
, mark_pvec
);
543 page_cache_get(netpage
);
544 if (!pagevec_add(&lru_pvec
, netpage
))
545 __pagevec_lru_add_file(&lru_pvec
);
547 fscache_end_io(op
, netpage
, 0);
548 page_cache_release(netpage
);
559 pagevec_lru_add_file(&lru_pvec
);
562 page_cache_release(newpage
);
564 page_cache_release(netpage
);
566 page_cache_release(backpage
);
568 fscache_put_retrieval(op
);
572 list_for_each_entry_safe(netpage
, _n
, list
, lru
) {
573 list_del(&netpage
->lru
);
574 page_cache_release(netpage
);
577 _leave(" = %d", ret
);
586 _debug("read error %d", ret
);
590 cachefiles_io_error_obj(object
, "Page read error on backing file");
596 * read a list of pages from the cache or allocate blocks in which to store
599 int cachefiles_read_or_alloc_pages(struct fscache_retrieval
*op
,
600 struct list_head
*pages
,
604 struct cachefiles_object
*object
;
605 struct cachefiles_cache
*cache
;
606 struct list_head backpages
;
607 struct pagevec pagevec
;
609 struct page
*page
, *_n
;
610 unsigned shift
, nrbackpages
;
611 int ret
, ret2
, space
;
613 object
= container_of(op
->op
.object
,
614 struct cachefiles_object
, fscache
);
615 cache
= container_of(object
->fscache
.cache
,
616 struct cachefiles_cache
, cache
);
618 _enter("{OBJ%x,%d},,%d,,",
619 object
->fscache
.debug_id
, atomic_read(&op
->op
.usage
),
626 if (cachefiles_has_space(cache
, 0, *nr_pages
) < 0)
629 inode
= object
->backer
->d_inode
;
630 ASSERT(S_ISREG(inode
->i_mode
));
631 ASSERT(inode
->i_mapping
->a_ops
->bmap
);
632 ASSERT(inode
->i_mapping
->a_ops
->readpages
);
634 /* calculate the shift required to use bmap */
635 if (inode
->i_sb
->s_blocksize
> PAGE_SIZE
)
638 shift
= PAGE_SHIFT
- inode
->i_sb
->s_blocksize_bits
;
640 pagevec_init(&pagevec
, 0);
642 op
->op
.flags
= FSCACHE_OP_FAST
;
643 op
->op
.processor
= cachefiles_read_copier
;
645 INIT_LIST_HEAD(&backpages
);
648 ret
= space
? -ENODATA
: -ENOBUFS
;
649 list_for_each_entry_safe(page
, _n
, pages
, lru
) {
650 sector_t block0
, block
;
652 /* we assume the absence or presence of the first block is a
653 * good enough indication for the page as a whole
654 * - TODO: don't use bmap() for this as it is _not_ actually
655 * good enough for this as it doesn't indicate errors, but
656 * it's all we've got for the moment
658 block0
= page
->index
;
661 block
= inode
->i_mapping
->a_ops
->bmap(inode
->i_mapping
,
663 _debug("%llx -> %llx",
664 (unsigned long long) block0
,
665 (unsigned long long) block
);
668 /* we have data - add it to the list to give to the
670 list_move(&page
->lru
, &backpages
);
673 } else if (space
&& pagevec_add(&pagevec
, page
) == 0) {
674 fscache_mark_pages_cached(op
, &pagevec
);
679 if (pagevec_count(&pagevec
) > 0)
680 fscache_mark_pages_cached(op
, &pagevec
);
682 if (list_empty(pages
))
685 /* submit the apparently valid pages to the backing fs to be read from
687 if (nrbackpages
> 0) {
688 ret2
= cachefiles_read_backing_file(object
, op
, &backpages
,
690 if (ret2
== -ENOMEM
|| ret2
== -EINTR
)
694 if (pagevec_count(&pagevec
) > 0)
695 fscache_mark_pages_cached(op
, &pagevec
);
697 _leave(" = %d [nr=%u%s]",
698 ret
, *nr_pages
, list_empty(pages
) ? " empty" : "");
703 * allocate a block in the cache in which to store a page
704 * - cache withdrawal is prevented by the caller
705 * - returns -EINTR if interrupted
706 * - returns -ENOMEM if ran out of memory
707 * - returns -ENOBUFS if no buffers can be made available
708 * - returns -ENOBUFS if page is beyond EOF
710 * - the metadata will be retained
711 * - 0 will be returned
713 int cachefiles_allocate_page(struct fscache_retrieval
*op
,
717 struct cachefiles_object
*object
;
718 struct cachefiles_cache
*cache
;
719 struct pagevec pagevec
;
722 object
= container_of(op
->op
.object
,
723 struct cachefiles_object
, fscache
);
724 cache
= container_of(object
->fscache
.cache
,
725 struct cachefiles_cache
, cache
);
727 _enter("%p,{%lx},", object
, page
->index
);
729 ret
= cachefiles_has_space(cache
, 0, 1);
731 pagevec_init(&pagevec
, 0);
732 pagevec_add(&pagevec
, page
);
733 fscache_mark_pages_cached(op
, &pagevec
);
738 _leave(" = %d", ret
);
743 * allocate blocks in the cache in which to store a set of pages
744 * - cache withdrawal is prevented by the caller
745 * - returns -EINTR if interrupted
746 * - returns -ENOMEM if ran out of memory
747 * - returns -ENOBUFS if some buffers couldn't be made available
748 * - returns -ENOBUFS if some pages are beyond EOF
750 * - -ENODATA will be returned
751 * - metadata will be retained for any page marked
753 int cachefiles_allocate_pages(struct fscache_retrieval
*op
,
754 struct list_head
*pages
,
758 struct cachefiles_object
*object
;
759 struct cachefiles_cache
*cache
;
760 struct pagevec pagevec
;
764 object
= container_of(op
->op
.object
,
765 struct cachefiles_object
, fscache
);
766 cache
= container_of(object
->fscache
.cache
,
767 struct cachefiles_cache
, cache
);
769 _enter("%p,,,%d,", object
, *nr_pages
);
771 ret
= cachefiles_has_space(cache
, 0, *nr_pages
);
773 pagevec_init(&pagevec
, 0);
775 list_for_each_entry(page
, pages
, lru
) {
776 if (pagevec_add(&pagevec
, page
) == 0)
777 fscache_mark_pages_cached(op
, &pagevec
);
780 if (pagevec_count(&pagevec
) > 0)
781 fscache_mark_pages_cached(op
, &pagevec
);
787 _leave(" = %d", ret
);
792 * request a page be stored in the cache
793 * - cache withdrawal is prevented by the caller
794 * - this request may be ignored if there's no cache block available, in which
795 * case -ENOBUFS will be returned
796 * - if the op is in progress, 0 will be returned
798 int cachefiles_write_page(struct fscache_storage
*op
, struct page
*page
)
800 struct cachefiles_object
*object
;
801 struct cachefiles_cache
*cache
;
809 ASSERT(page
!= NULL
);
811 object
= container_of(op
->op
.object
,
812 struct cachefiles_object
, fscache
);
814 _enter("%p,%p{%lx},,,", object
, page
, page
->index
);
816 if (!object
->backer
) {
817 _leave(" = -ENOBUFS");
821 ASSERT(S_ISREG(object
->backer
->d_inode
->i_mode
));
823 cache
= container_of(object
->fscache
.cache
,
824 struct cachefiles_cache
, cache
);
826 /* write the page to the backing filesystem and let it store it in its
828 dget(object
->backer
);
830 file
= dentry_open(object
->backer
, cache
->mnt
, O_RDWR
,
836 if (file
->f_op
->write
) {
837 pos
= (loff_t
) page
->index
<< PAGE_SHIFT
;
841 ret
= file
->f_op
->write(
842 file
, (const void __user
*) data
, PAGE_SIZE
,
846 if (ret
!= PAGE_SIZE
)
854 cachefiles_io_error_obj(
855 object
, "Write page to backing file failed");
859 _leave(" = %d", ret
);
864 * detach a backing block from a page
865 * - cache withdrawal is prevented by the caller
867 void cachefiles_uncache_page(struct fscache_object
*_object
, struct page
*page
)
869 struct cachefiles_object
*object
;
870 struct cachefiles_cache
*cache
;
872 object
= container_of(_object
, struct cachefiles_object
, fscache
);
873 cache
= container_of(object
->fscache
.cache
,
874 struct cachefiles_cache
, cache
);
876 _enter("%p,{%lu}", object
, page
->index
);
878 spin_unlock(&object
->fscache
.cookie
->lock
);