2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <linux/bio.h>
23 #include <linux/init.h>
24 #include <linux/buffer_head.h>
25 #include <linux/mempool.h>
26 #include "jfs_incore.h"
27 #include "jfs_superblock.h"
28 #include "jfs_filsys.h"
29 #include "jfs_metapage.h"
30 #include "jfs_txnmgr.h"
31 #include "jfs_debug.h"
33 #ifdef CONFIG_JFS_STATISTICS
35 uint pagealloc
; /* # of page allocations */
36 uint pagefree
; /* # of page frees */
37 uint lockwait
; /* # of sleeping lock_metapage() calls */
41 #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
42 #define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
44 static inline void unlock_metapage(struct metapage
*mp
)
46 clear_bit(META_locked
, &mp
->flag
);
50 static inline void __lock_metapage(struct metapage
*mp
)
52 DECLARE_WAITQUEUE(wait
, current
);
53 INCREMENT(mpStat
.lockwait
);
54 add_wait_queue_exclusive(&mp
->wait
, &wait
);
56 set_current_state(TASK_UNINTERRUPTIBLE
);
57 if (metapage_locked(mp
)) {
58 unlock_page(mp
->page
);
62 } while (trylock_metapage(mp
));
63 __set_current_state(TASK_RUNNING
);
64 remove_wait_queue(&mp
->wait
, &wait
);
68 * Must have mp->page locked
70 static inline void lock_metapage(struct metapage
*mp
)
72 if (trylock_metapage(mp
))
76 #define METAPOOL_MIN_PAGES 32
77 static kmem_cache_t
*metapage_cache
;
78 static mempool_t
*metapage_mempool
;
80 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
87 struct metapage
*mp
[MPS_PER_PAGE
];
89 #define mp_anchor(page) ((struct meta_anchor *)page->private)
91 static inline struct metapage
*page_to_mp(struct page
*page
, uint offset
)
93 if (!PagePrivate(page
))
95 return mp_anchor(page
)->mp
[offset
>> L2PSIZE
];
98 static inline int insert_metapage(struct page
*page
, struct metapage
*mp
)
100 struct meta_anchor
*a
;
102 int l2mp_blocks
; /* log2 blocks per metapage */
104 if (PagePrivate(page
))
107 a
= kmalloc(sizeof(struct meta_anchor
), GFP_NOFS
);
110 memset(a
, 0, sizeof(struct meta_anchor
));
111 page
->private = (unsigned long)a
;
112 SetPagePrivate(page
);
117 l2mp_blocks
= L2PSIZE
- page
->mapping
->host
->i_blkbits
;
118 index
= (mp
->index
>> l2mp_blocks
) & (MPS_PER_PAGE
- 1);
126 static inline void remove_metapage(struct page
*page
, struct metapage
*mp
)
128 struct meta_anchor
*a
= mp_anchor(page
);
129 int l2mp_blocks
= L2PSIZE
- page
->mapping
->host
->i_blkbits
;
132 index
= (mp
->index
>> l2mp_blocks
) & (MPS_PER_PAGE
- 1);
134 BUG_ON(a
->mp
[index
] != mp
);
137 if (--a
->mp_count
== 0) {
140 ClearPagePrivate(page
);
145 static inline void inc_io(struct page
*page
)
147 atomic_inc(&mp_anchor(page
)->io_count
);
150 static inline void dec_io(struct page
*page
, void (*handler
) (struct page
*))
152 if (atomic_dec_and_test(&mp_anchor(page
)->io_count
))
157 static inline struct metapage
*page_to_mp(struct page
*page
, uint offset
)
159 return PagePrivate(page
) ? (struct metapage
*)page
->private : NULL
;
162 static inline int insert_metapage(struct page
*page
, struct metapage
*mp
)
165 page
->private = (unsigned long)mp
;
166 SetPagePrivate(page
);
172 static inline void remove_metapage(struct page
*page
, struct metapage
*mp
)
175 ClearPagePrivate(page
);
179 #define inc_io(page) do {} while(0)
180 #define dec_io(page, handler) handler(page)
184 static void init_once(void *foo
, kmem_cache_t
*cachep
, unsigned long flags
)
186 struct metapage
*mp
= (struct metapage
*)foo
;
188 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
189 SLAB_CTOR_CONSTRUCTOR
) {
196 set_bit(META_free
, &mp
->flag
);
197 init_waitqueue_head(&mp
->wait
);
201 static inline struct metapage
*alloc_metapage(unsigned int gfp_mask
)
203 return mempool_alloc(metapage_mempool
, gfp_mask
);
206 static inline void free_metapage(struct metapage
*mp
)
209 set_bit(META_free
, &mp
->flag
);
211 mempool_free(mp
, metapage_mempool
);
214 int __init
metapage_init(void)
217 * Allocate the metapage structures
219 metapage_cache
= kmem_cache_create("jfs_mp", sizeof(struct metapage
),
220 0, 0, init_once
, NULL
);
221 if (metapage_cache
== NULL
)
224 metapage_mempool
= mempool_create(METAPOOL_MIN_PAGES
, mempool_alloc_slab
,
225 mempool_free_slab
, metapage_cache
);
227 if (metapage_mempool
== NULL
) {
228 kmem_cache_destroy(metapage_cache
);
235 void metapage_exit(void)
237 mempool_destroy(metapage_mempool
);
238 kmem_cache_destroy(metapage_cache
);
241 static inline void drop_metapage(struct page
*page
, struct metapage
*mp
)
243 if (mp
->count
|| mp
->nohomeok
|| test_bit(META_dirty
, &mp
->flag
) ||
244 test_bit(META_io
, &mp
->flag
))
246 remove_metapage(page
, mp
);
247 INCREMENT(mpStat
.pagefree
);
252 * Metapage address space operations
255 static sector_t
metapage_get_blocks(struct inode
*inode
, sector_t lblock
,
261 sector_t file_blocks
= (inode
->i_size
+ inode
->i_blksize
- 1) >>
264 if (lblock
>= file_blocks
)
266 if (lblock
+ *len
> file_blocks
)
267 *len
= file_blocks
- lblock
;
270 rc
= xtLookup(inode
, (s64
)lblock
, *len
, &xflag
, &xaddr
, len
, 0);
271 if ((rc
== 0) && *len
)
272 lblock
= (sector_t
)xaddr
;
275 } /* else no mapping */
280 static void last_read_complete(struct page
*page
)
282 if (!PageError(page
))
283 SetPageUptodate(page
);
287 static int metapage_read_end_io(struct bio
*bio
, unsigned int bytes_done
,
290 struct page
*page
= bio
->bi_private
;
295 if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
)) {
296 printk(KERN_ERR
"metapage_read_end_io: I/O error\n");
300 dec_io(page
, last_read_complete
);
306 static void remove_from_logsync(struct metapage
*mp
)
308 struct jfs_log
*log
= mp
->log
;
311 * This can race. Recheck that log hasn't been set to null, and after
312 * acquiring logsync lock, recheck lsn
317 LOGSYNC_LOCK(log
, flags
);
323 list_del(&mp
->synclist
);
325 LOGSYNC_UNLOCK(log
, flags
);
328 static void last_write_complete(struct page
*page
)
333 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
334 mp
= page_to_mp(page
, offset
);
335 if (mp
&& test_bit(META_io
, &mp
->flag
)) {
337 remove_from_logsync(mp
);
338 clear_bit(META_io
, &mp
->flag
);
341 * I'd like to call drop_metapage here, but I don't think it's
342 * safe unless I have the page locked
345 end_page_writeback(page
);
348 static int metapage_write_end_io(struct bio
*bio
, unsigned int bytes_done
,
351 struct page
*page
= bio
->bi_private
;
353 BUG_ON(!PagePrivate(page
));
358 if (! test_bit(BIO_UPTODATE
, &bio
->bi_flags
)) {
359 printk(KERN_ERR
"metapage_write_end_io: I/O error\n");
362 dec_io(page
, last_write_complete
);
367 static int metapage_writepage(struct page
*page
, struct writeback_control
*wbc
)
369 struct bio
*bio
= NULL
;
370 unsigned int block_offset
; /* block offset of mp within page */
371 struct inode
*inode
= page
->mapping
->host
;
372 unsigned int blocks_per_mp
= JFS_SBI(inode
->i_sb
)->nbperpage
;
379 sector_t next_block
= 0;
381 unsigned long bio_bytes
= 0;
382 unsigned long bio_offset
= 0;
385 page_start
= (sector_t
)page
->index
<<
386 (PAGE_CACHE_SHIFT
- inode
->i_blkbits
);
387 BUG_ON(!PageLocked(page
));
388 BUG_ON(PageWriteback(page
));
390 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
391 mp
= page_to_mp(page
, offset
);
393 if (!mp
|| !test_bit(META_dirty
, &mp
->flag
))
396 if (mp
->nohomeok
&& !test_bit(META_forcewrite
, &mp
->flag
)) {
401 clear_bit(META_dirty
, &mp
->flag
);
402 block_offset
= offset
>> inode
->i_blkbits
;
403 lblock
= page_start
+ block_offset
;
405 if (xlen
&& lblock
== next_block
) {
406 /* Contiguous, in memory & on disk */
407 len
= min(xlen
, blocks_per_mp
);
409 bio_bytes
+= len
<< inode
->i_blkbits
;
410 set_bit(META_io
, &mp
->flag
);
414 if (bio_add_page(bio
, page
, bio_bytes
, bio_offset
) <
418 * Increment counter before submitting i/o to keep
419 * count from hitting zero before we're through
424 submit_bio(WRITE
, bio
);
427 set_page_writeback(page
);
430 xlen
= (PAGE_CACHE_SIZE
- offset
) >> inode
->i_blkbits
;
431 pblock
= metapage_get_blocks(inode
, lblock
, &xlen
);
433 /* Need better error handling */
434 printk(KERN_ERR
"JFS: metapage_get_blocks failed\n");
435 dec_io(page
, last_write_complete
);
438 set_bit(META_io
, &mp
->flag
);
439 len
= min(xlen
, (uint
) JFS_SBI(inode
->i_sb
)->nbperpage
);
441 bio
= bio_alloc(GFP_NOFS
, 1);
442 bio
->bi_bdev
= inode
->i_sb
->s_bdev
;
443 bio
->bi_sector
= pblock
<< (inode
->i_blkbits
- 9);
444 bio
->bi_end_io
= metapage_write_end_io
;
445 bio
->bi_private
= page
;
447 /* Don't call bio_add_page yet, we may add to this vec */
449 bio_bytes
= len
<< inode
->i_blkbits
;
452 next_block
= lblock
+ len
;
455 if (bio_add_page(bio
, page
, bio_bytes
, bio_offset
) < bio_bytes
)
460 submit_bio(WRITE
, bio
);
463 redirty_page_for_writepage(wbc
, page
);
469 /* We should never reach here, since we're only adding one vec */
470 printk(KERN_ERR
"JFS: bio_add_page failed unexpectedly\n");
473 dump_mem("bio", bio
, sizeof(*bio
));
477 dec_io(page
, last_write_complete
);
482 static int metapage_readpage(struct file
*fp
, struct page
*page
)
484 struct inode
*inode
= page
->mapping
->host
;
485 struct bio
*bio
= NULL
;
486 unsigned int block_offset
;
487 unsigned int blocks_per_page
= PAGE_CACHE_SIZE
>> inode
->i_blkbits
;
488 sector_t page_start
; /* address of page in fs blocks */
494 BUG_ON(!PageLocked(page
));
495 page_start
= (sector_t
)page
->index
<<
496 (PAGE_CACHE_SHIFT
- inode
->i_blkbits
);
499 while (block_offset
< blocks_per_page
) {
500 xlen
= blocks_per_page
- block_offset
;
501 pblock
= metapage_get_blocks(inode
, page_start
+ block_offset
,
504 if (!PagePrivate(page
))
505 insert_metapage(page
, NULL
);
508 submit_bio(READ
, bio
);
510 bio
= bio_alloc(GFP_NOFS
, 1);
511 bio
->bi_bdev
= inode
->i_sb
->s_bdev
;
512 bio
->bi_sector
= pblock
<< (inode
->i_blkbits
- 9);
513 bio
->bi_end_io
= metapage_read_end_io
;
514 bio
->bi_private
= page
;
515 len
= xlen
<< inode
->i_blkbits
;
516 offset
= block_offset
<< inode
->i_blkbits
;
517 if (bio_add_page(bio
, page
, len
, offset
) < len
)
519 block_offset
+= xlen
;
524 submit_bio(READ
, bio
);
531 printk(KERN_ERR
"JFS: bio_add_page failed unexpectedly\n");
533 dec_io(page
, last_read_complete
);
537 static int metapage_releasepage(struct page
*page
, int gfp_mask
)
543 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
544 mp
= page_to_mp(page
, offset
);
549 jfs_info("metapage_releasepage: mp = 0x%p", mp
);
550 if (mp
->count
|| mp
->nohomeok
) {
551 jfs_info("count = %ld, nohomeok = %d", mp
->count
,
556 wait_on_page_writeback(page
);
557 //WARN_ON(test_bit(META_dirty, &mp->flag));
558 if (test_bit(META_dirty
, &mp
->flag
)) {
559 dump_mem("dirty mp in metapage_releasepage", mp
,
560 sizeof(struct metapage
));
561 dump_mem("page", page
, sizeof(struct page
));
565 remove_from_logsync(mp
);
566 remove_metapage(page
, mp
);
567 INCREMENT(mpStat
.pagefree
);
576 static int metapage_invalidatepage(struct page
*page
, unsigned long offset
)
580 if (PageWriteback(page
))
583 return metapage_releasepage(page
, 0);
586 struct address_space_operations jfs_metapage_aops
= {
587 .readpage
= metapage_readpage
,
588 .writepage
= metapage_writepage
,
589 .sync_page
= block_sync_page
,
590 .releasepage
= metapage_releasepage
,
591 .invalidatepage
= metapage_invalidatepage
,
592 .set_page_dirty
= __set_page_dirty_nobuffers
,
595 struct metapage
*__get_metapage(struct inode
*inode
, unsigned long lblock
,
596 unsigned int size
, int absolute
,
601 struct address_space
*mapping
;
602 struct metapage
*mp
= NULL
;
604 unsigned long page_index
;
605 unsigned long page_offset
;
607 jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
608 inode
->i_ino
, lblock
, absolute
);
610 l2bsize
= inode
->i_blkbits
;
611 l2BlocksPerPage
= PAGE_CACHE_SHIFT
- l2bsize
;
612 page_index
= lblock
>> l2BlocksPerPage
;
613 page_offset
= (lblock
- (page_index
<< l2BlocksPerPage
)) << l2bsize
;
614 if ((page_offset
+ size
) > PAGE_CACHE_SIZE
) {
615 jfs_err("MetaData crosses page boundary!!");
616 jfs_err("lblock = %lx, size = %d", lblock
, size
);
621 mapping
= JFS_SBI(inode
->i_sb
)->direct_inode
->i_mapping
;
624 * If an nfs client tries to read an inode that is larger
625 * than any existing inodes, we may try to read past the
626 * end of the inode map
628 if ((lblock
<< inode
->i_blkbits
) >= inode
->i_size
)
630 mapping
= inode
->i_mapping
;
633 if (new && (PSIZE
== PAGE_CACHE_SIZE
)) {
634 page
= grab_cache_page(mapping
, page_index
);
636 jfs_err("grab_cache_page failed!");
639 SetPageUptodate(page
);
641 page
= read_cache_page(mapping
, page_index
,
642 (filler_t
*)mapping
->a_ops
->readpage
, NULL
);
643 if (IS_ERR(page
) || !PageUptodate(page
)) {
644 jfs_err("read_cache_page failed!");
650 mp
= page_to_mp(page
, page_offset
);
652 if (mp
->logical_size
!= size
) {
653 jfs_error(inode
->i_sb
,
654 "__get_metapage: mp->logical_size != size");
655 jfs_err("logical_size = %d, size = %d",
656 mp
->logical_size
, size
);
662 if (test_bit(META_discard
, &mp
->flag
)) {
664 jfs_error(inode
->i_sb
,
665 "__get_metapage: using a "
666 "discarded metapage");
667 discard_metapage(mp
);
670 clear_bit(META_discard
, &mp
->flag
);
673 INCREMENT(mpStat
.pagealloc
);
674 mp
= alloc_metapage(GFP_NOFS
);
677 mp
->xflag
= COMMIT_PAGE
;
680 mp
->logical_size
= size
;
681 mp
->data
= page_address(page
) + page_offset
;
683 if (unlikely(insert_metapage(page
, mp
))) {
691 jfs_info("zeroing mp = 0x%p", mp
);
692 memset(mp
->data
, 0, PSIZE
);
696 jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp
, mp
->data
);
704 void grab_metapage(struct metapage
* mp
)
706 jfs_info("grab_metapage: mp = 0x%p", mp
);
707 page_cache_get(mp
->page
);
711 unlock_page(mp
->page
);
714 void force_metapage(struct metapage
*mp
)
716 struct page
*page
= mp
->page
;
717 jfs_info("force_metapage: mp = 0x%p", mp
);
718 set_bit(META_forcewrite
, &mp
->flag
);
719 clear_bit(META_sync
, &mp
->flag
);
720 page_cache_get(page
);
722 set_page_dirty(page
);
723 write_one_page(page
, 1);
724 clear_bit(META_forcewrite
, &mp
->flag
);
725 page_cache_release(page
);
728 void hold_metapage(struct metapage
*mp
)
733 void put_metapage(struct metapage
*mp
)
735 if (mp
->count
|| mp
->nohomeok
) {
736 /* Someone else will release this */
737 unlock_page(mp
->page
);
740 page_cache_get(mp
->page
);
743 unlock_page(mp
->page
);
744 release_metapage(mp
);
747 void release_metapage(struct metapage
* mp
)
749 struct page
*page
= mp
->page
;
750 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp
, mp
->flag
);
758 if (--mp
->count
|| mp
->nohomeok
) {
760 page_cache_release(page
);
764 if (test_bit(META_dirty
, &mp
->flag
)) {
765 set_page_dirty(page
);
766 if (test_bit(META_sync
, &mp
->flag
)) {
767 clear_bit(META_sync
, &mp
->flag
);
768 write_one_page(page
, 1);
769 lock_page(page
); /* write_one_page unlocks the page */
771 } else if (mp
->lsn
) /* discard_metapage doesn't remove it */
772 remove_from_logsync(mp
);
774 #if MPS_PER_PAGE == 1
776 * If we know this is the only thing in the page, we can throw
777 * the page out of the page cache. If pages are larger, we
778 * don't want to do this.
781 /* Retest mp->count since we may have released page lock */
782 if (test_bit(META_discard
, &mp
->flag
) && !mp
->count
) {
783 clear_page_dirty(page
);
784 ClearPageUptodate(page
);
787 /* Try to keep metapages from using up too much memory */
788 drop_metapage(page
, mp
);
791 page_cache_release(page
);
794 void __invalidate_metapages(struct inode
*ip
, s64 addr
, int len
)
797 int l2BlocksPerPage
= PAGE_CACHE_SHIFT
- ip
->i_blkbits
;
798 int BlocksPerPage
= 1 << l2BlocksPerPage
;
799 /* All callers are interested in block device's mapping */
800 struct address_space
*mapping
=
801 JFS_SBI(ip
->i_sb
)->direct_inode
->i_mapping
;
807 * Mark metapages to discard. They will eventually be
808 * released, but should not be written.
810 for (lblock
= addr
& ~(BlocksPerPage
- 1); lblock
< addr
+ len
;
811 lblock
+= BlocksPerPage
) {
812 page
= find_lock_page(mapping
, lblock
>> l2BlocksPerPage
);
815 for (offset
= 0; offset
< PAGE_CACHE_SIZE
; offset
+= PSIZE
) {
816 mp
= page_to_mp(page
, offset
);
819 if (mp
->index
< addr
)
821 if (mp
->index
>= addr
+ len
)
824 clear_bit(META_dirty
, &mp
->flag
);
825 set_bit(META_discard
, &mp
->flag
);
827 remove_from_logsync(mp
);
830 page_cache_release(page
);
834 #ifdef CONFIG_JFS_STATISTICS
835 int jfs_mpstat_read(char *buffer
, char **start
, off_t offset
, int length
,
836 int *eof
, void *data
)
841 len
+= sprintf(buffer
,
842 "JFS Metapage statistics\n"
843 "=======================\n"
844 "page allocations = %d\n"
852 *start
= buffer
+ begin
;