2 * the_nilfs.c - the_nilfs shared structure.
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
24 #include <linux/buffer_head.h>
25 #include <linux/slab.h>
26 #include <linux/blkdev.h>
27 #include <linux/backing-dev.h>
28 #include <linux/crc32.h>
38 void nilfs_set_last_segment(struct the_nilfs
*nilfs
,
39 sector_t start_blocknr
, u64 seq
, __u64 cno
)
41 spin_lock(&nilfs
->ns_last_segment_lock
);
42 nilfs
->ns_last_pseg
= start_blocknr
;
43 nilfs
->ns_last_seq
= seq
;
44 nilfs
->ns_last_cno
= cno
;
45 spin_unlock(&nilfs
->ns_last_segment_lock
);
49 * alloc_nilfs - allocate the_nilfs structure
50 * @bdev: block device to which the_nilfs is related
52 * alloc_nilfs() allocates memory for the_nilfs and
53 * initializes its reference count and locks.
55 * Return Value: On success, pointer to the_nilfs is returned.
56 * On error, NULL is returned.
58 struct the_nilfs
*alloc_nilfs(struct block_device
*bdev
)
60 struct the_nilfs
*nilfs
;
62 nilfs
= kzalloc(sizeof(*nilfs
), GFP_KERNEL
);
66 nilfs
->ns_bdev
= bdev
;
67 atomic_set(&nilfs
->ns_count
, 1);
68 atomic_set(&nilfs
->ns_writer_refcount
, -1);
69 atomic_set(&nilfs
->ns_ndirtyblks
, 0);
70 init_rwsem(&nilfs
->ns_sem
);
71 mutex_init(&nilfs
->ns_writer_mutex
);
72 INIT_LIST_HEAD(&nilfs
->ns_supers
);
73 spin_lock_init(&nilfs
->ns_last_segment_lock
);
74 nilfs
->ns_gc_inodes_h
= NULL
;
75 init_rwsem(&nilfs
->ns_segctor_sem
);
81 * put_nilfs - release a reference to the_nilfs
82 * @nilfs: the_nilfs structure to be released
84 * put_nilfs() decrements a reference counter of the_nilfs.
85 * If the reference count reaches zero, the_nilfs is freed.
87 void put_nilfs(struct the_nilfs
*nilfs
)
89 if (!atomic_dec_and_test(&nilfs
->ns_count
))
92 * Increment of ns_count never occur below because the caller
93 * of get_nilfs() holds at least one reference to the_nilfs.
94 * Thus its exclusion control is not required here.
97 if (nilfs_loaded(nilfs
)) {
98 nilfs_mdt_clear(nilfs
->ns_sufile
);
99 nilfs_mdt_destroy(nilfs
->ns_sufile
);
100 nilfs_mdt_clear(nilfs
->ns_cpfile
);
101 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
102 nilfs_mdt_clear(nilfs
->ns_dat
);
103 nilfs_mdt_destroy(nilfs
->ns_dat
);
104 /* XXX: how and when to clear nilfs->ns_gc_dat? */
105 nilfs_mdt_destroy(nilfs
->ns_gc_dat
);
107 if (nilfs_init(nilfs
)) {
108 nilfs_destroy_gccache(nilfs
);
109 brelse(nilfs
->ns_sbh
[0]);
110 brelse(nilfs
->ns_sbh
[1]);
115 static int nilfs_load_super_root(struct the_nilfs
*nilfs
,
116 struct nilfs_sb_info
*sbi
, sector_t sr_block
)
118 static struct lock_class_key dat_lock_key
;
119 struct buffer_head
*bh_sr
;
120 struct nilfs_super_root
*raw_sr
;
121 struct nilfs_super_block
**sbp
= nilfs
->ns_sbp
;
122 unsigned dat_entry_size
, segment_usage_size
, checkpoint_size
;
126 err
= nilfs_read_super_root_block(sbi
->s_super
, sr_block
, &bh_sr
, 1);
130 down_read(&nilfs
->ns_sem
);
131 dat_entry_size
= le16_to_cpu(sbp
[0]->s_dat_entry_size
);
132 checkpoint_size
= le16_to_cpu(sbp
[0]->s_checkpoint_size
);
133 segment_usage_size
= le16_to_cpu(sbp
[0]->s_segment_usage_size
);
134 up_read(&nilfs
->ns_sem
);
136 inode_size
= nilfs
->ns_inode_size
;
139 nilfs
->ns_dat
= nilfs_mdt_new(
140 nilfs
, NULL
, NILFS_DAT_INO
, NILFS_DAT_GFP
);
141 if (unlikely(!nilfs
->ns_dat
))
144 nilfs
->ns_gc_dat
= nilfs_mdt_new(
145 nilfs
, NULL
, NILFS_DAT_INO
, NILFS_DAT_GFP
);
146 if (unlikely(!nilfs
->ns_gc_dat
))
149 nilfs
->ns_cpfile
= nilfs_mdt_new(
150 nilfs
, NULL
, NILFS_CPFILE_INO
, NILFS_CPFILE_GFP
);
151 if (unlikely(!nilfs
->ns_cpfile
))
154 nilfs
->ns_sufile
= nilfs_mdt_new(
155 nilfs
, NULL
, NILFS_SUFILE_INO
, NILFS_SUFILE_GFP
);
156 if (unlikely(!nilfs
->ns_sufile
))
159 err
= nilfs_palloc_init_blockgroup(nilfs
->ns_dat
, dat_entry_size
);
163 err
= nilfs_palloc_init_blockgroup(nilfs
->ns_gc_dat
, dat_entry_size
);
167 lockdep_set_class(&NILFS_MDT(nilfs
->ns_dat
)->mi_sem
, &dat_lock_key
);
168 lockdep_set_class(&NILFS_MDT(nilfs
->ns_gc_dat
)->mi_sem
, &dat_lock_key
);
170 nilfs_mdt_set_shadow(nilfs
->ns_dat
, nilfs
->ns_gc_dat
);
171 nilfs_mdt_set_entry_size(nilfs
->ns_cpfile
, checkpoint_size
,
172 sizeof(struct nilfs_cpfile_header
));
173 nilfs_mdt_set_entry_size(nilfs
->ns_sufile
, segment_usage_size
,
174 sizeof(struct nilfs_sufile_header
));
176 err
= nilfs_mdt_read_inode_direct(
177 nilfs
->ns_dat
, bh_sr
, NILFS_SR_DAT_OFFSET(inode_size
));
181 err
= nilfs_mdt_read_inode_direct(
182 nilfs
->ns_cpfile
, bh_sr
, NILFS_SR_CPFILE_OFFSET(inode_size
));
186 err
= nilfs_mdt_read_inode_direct(
187 nilfs
->ns_sufile
, bh_sr
, NILFS_SR_SUFILE_OFFSET(inode_size
));
191 raw_sr
= (struct nilfs_super_root
*)bh_sr
->b_data
;
192 nilfs
->ns_nongc_ctime
= le64_to_cpu(raw_sr
->sr_nongc_ctime
);
199 nilfs_mdt_destroy(nilfs
->ns_sufile
);
202 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
205 nilfs_mdt_destroy(nilfs
->ns_gc_dat
);
208 nilfs_mdt_destroy(nilfs
->ns_dat
);
212 static void nilfs_init_recovery_info(struct nilfs_recovery_info
*ri
)
214 memset(ri
, 0, sizeof(*ri
));
215 INIT_LIST_HEAD(&ri
->ri_used_segments
);
218 static void nilfs_clear_recovery_info(struct nilfs_recovery_info
*ri
)
220 nilfs_dispose_segment_list(&ri
->ri_used_segments
);
224 * load_nilfs - load and recover the nilfs
225 * @nilfs: the_nilfs structure to be released
226 * @sbi: nilfs_sb_info used to recover past segment
228 * load_nilfs() searches and load the latest super root,
229 * attaches the last segment, and does recovery if needed.
230 * The caller must call this exclusively for simultaneous mounts.
232 int load_nilfs(struct the_nilfs
*nilfs
, struct nilfs_sb_info
*sbi
)
234 struct nilfs_recovery_info ri
;
235 unsigned int s_flags
= sbi
->s_super
->s_flags
;
236 int really_read_only
= bdev_read_only(nilfs
->ns_bdev
);
240 nilfs_init_recovery_info(&ri
);
242 down_write(&nilfs
->ns_sem
);
243 valid_fs
= (nilfs
->ns_mount_state
& NILFS_VALID_FS
);
244 up_write(&nilfs
->ns_sem
);
246 if (!valid_fs
&& (s_flags
& MS_RDONLY
)) {
247 printk(KERN_INFO
"NILFS: INFO: recovery "
248 "required for readonly filesystem.\n");
249 if (really_read_only
) {
250 printk(KERN_ERR
"NILFS: write access "
251 "unavailable, cannot proceed.\n");
255 printk(KERN_INFO
"NILFS: write access will "
256 "be enabled during recovery.\n");
257 sbi
->s_super
->s_flags
&= ~MS_RDONLY
;
260 err
= nilfs_search_super_root(nilfs
, sbi
, &ri
);
262 printk(KERN_ERR
"NILFS: error searching super root.\n");
266 err
= nilfs_load_super_root(nilfs
, sbi
, ri
.ri_super_root
);
268 printk(KERN_ERR
"NILFS: error loading super root.\n");
273 err
= nilfs_recover_logical_segments(nilfs
, sbi
, &ri
);
275 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
276 nilfs_mdt_destroy(nilfs
->ns_sufile
);
277 nilfs_mdt_destroy(nilfs
->ns_dat
);
280 if (ri
.ri_need_recovery
== NILFS_RECOVERY_SR_UPDATED
)
281 sbi
->s_super
->s_dirt
= 1;
284 set_nilfs_loaded(nilfs
);
287 nilfs_clear_recovery_info(&ri
);
288 sbi
->s_super
->s_flags
= s_flags
;
292 static unsigned long long nilfs_max_size(unsigned int blkbits
)
294 unsigned int max_bits
;
295 unsigned long long res
= MAX_LFS_FILESIZE
; /* page cache limit */
297 max_bits
= blkbits
+ NILFS_BMAP_KEY_BIT
; /* bmap size limit */
299 res
= min_t(unsigned long long, res
, (1ULL << max_bits
) - 1);
303 static int nilfs_store_disk_layout(struct the_nilfs
*nilfs
,
304 struct nilfs_super_block
*sbp
)
306 if (le32_to_cpu(sbp
->s_rev_level
) != NILFS_CURRENT_REV
) {
307 printk(KERN_ERR
"NILFS: revision mismatch "
308 "(superblock rev.=%d.%d, current rev.=%d.%d). "
309 "Please check the version of mkfs.nilfs.\n",
310 le32_to_cpu(sbp
->s_rev_level
),
311 le16_to_cpu(sbp
->s_minor_rev_level
),
312 NILFS_CURRENT_REV
, NILFS_MINOR_REV
);
315 nilfs
->ns_sbsize
= le16_to_cpu(sbp
->s_bytes
);
316 if (nilfs
->ns_sbsize
> BLOCK_SIZE
)
319 nilfs
->ns_inode_size
= le16_to_cpu(sbp
->s_inode_size
);
320 nilfs
->ns_first_ino
= le32_to_cpu(sbp
->s_first_ino
);
322 nilfs
->ns_blocks_per_segment
= le32_to_cpu(sbp
->s_blocks_per_segment
);
323 if (nilfs
->ns_blocks_per_segment
< NILFS_SEG_MIN_BLOCKS
) {
324 printk(KERN_ERR
"NILFS: too short segment. \n");
328 nilfs
->ns_first_data_block
= le64_to_cpu(sbp
->s_first_data_block
);
329 nilfs
->ns_nsegments
= le64_to_cpu(sbp
->s_nsegments
);
330 nilfs
->ns_r_segments_percentage
=
331 le32_to_cpu(sbp
->s_r_segments_percentage
);
333 max_t(unsigned long, NILFS_MIN_NRSVSEGS
,
334 DIV_ROUND_UP(nilfs
->ns_nsegments
*
335 nilfs
->ns_r_segments_percentage
, 100));
336 nilfs
->ns_crc_seed
= le32_to_cpu(sbp
->s_crc_seed
);
340 static int nilfs_valid_sb(struct nilfs_super_block
*sbp
)
342 static unsigned char sum
[4];
343 const int sumoff
= offsetof(struct nilfs_super_block
, s_sum
);
347 if (!sbp
|| le16_to_cpu(sbp
->s_magic
) != NILFS_SUPER_MAGIC
)
349 bytes
= le16_to_cpu(sbp
->s_bytes
);
350 if (bytes
> BLOCK_SIZE
)
352 crc
= crc32_le(le32_to_cpu(sbp
->s_crc_seed
), (unsigned char *)sbp
,
354 crc
= crc32_le(crc
, sum
, 4);
355 crc
= crc32_le(crc
, (unsigned char *)sbp
+ sumoff
+ 4,
357 return crc
== le32_to_cpu(sbp
->s_sum
);
360 static int nilfs_sb2_bad_offset(struct nilfs_super_block
*sbp
, u64 offset
)
362 return offset
< ((le64_to_cpu(sbp
->s_nsegments
) *
363 le32_to_cpu(sbp
->s_blocks_per_segment
)) <<
364 (le32_to_cpu(sbp
->s_log_block_size
) + 10));
367 static void nilfs_release_super_block(struct the_nilfs
*nilfs
)
371 for (i
= 0; i
< 2; i
++) {
372 if (nilfs
->ns_sbp
[i
]) {
373 brelse(nilfs
->ns_sbh
[i
]);
374 nilfs
->ns_sbh
[i
] = NULL
;
375 nilfs
->ns_sbp
[i
] = NULL
;
380 void nilfs_fall_back_super_block(struct the_nilfs
*nilfs
)
382 brelse(nilfs
->ns_sbh
[0]);
383 nilfs
->ns_sbh
[0] = nilfs
->ns_sbh
[1];
384 nilfs
->ns_sbp
[0] = nilfs
->ns_sbp
[1];
385 nilfs
->ns_sbh
[1] = NULL
;
386 nilfs
->ns_sbp
[1] = NULL
;
389 void nilfs_swap_super_block(struct the_nilfs
*nilfs
)
391 struct buffer_head
*tsbh
= nilfs
->ns_sbh
[0];
392 struct nilfs_super_block
*tsbp
= nilfs
->ns_sbp
[0];
394 nilfs
->ns_sbh
[0] = nilfs
->ns_sbh
[1];
395 nilfs
->ns_sbp
[0] = nilfs
->ns_sbp
[1];
396 nilfs
->ns_sbh
[1] = tsbh
;
397 nilfs
->ns_sbp
[1] = tsbp
;
400 static int nilfs_load_super_block(struct the_nilfs
*nilfs
,
401 struct super_block
*sb
, int blocksize
,
402 struct nilfs_super_block
**sbpp
)
404 struct nilfs_super_block
**sbp
= nilfs
->ns_sbp
;
405 struct buffer_head
**sbh
= nilfs
->ns_sbh
;
406 u64 sb2off
= NILFS_SB2_OFFSET_BYTES(nilfs
->ns_bdev
->bd_inode
->i_size
);
407 int valid
[2], swp
= 0;
409 sbp
[0] = nilfs_read_super_block(sb
, NILFS_SB_OFFSET_BYTES
, blocksize
,
411 sbp
[1] = nilfs_read_super_block(sb
, sb2off
, blocksize
, &sbh
[1]);
415 printk(KERN_ERR
"NILFS: unable to read superblock\n");
419 "NILFS warning: unable to read primary superblock\n");
422 "NILFS warning: unable to read secondary superblock\n");
424 valid
[0] = nilfs_valid_sb(sbp
[0]);
425 valid
[1] = nilfs_valid_sb(sbp
[1]);
428 le64_to_cpu(sbp
[1]->s_wtime
) > le64_to_cpu(sbp
[0]->s_wtime
));
430 if (valid
[swp
] && nilfs_sb2_bad_offset(sbp
[swp
], sb2off
)) {
437 nilfs_release_super_block(nilfs
);
438 printk(KERN_ERR
"NILFS: Can't find nilfs on dev %s.\n",
444 printk(KERN_WARNING
"NILFS warning: broken superblock. "
445 "using spare superblock.\n");
446 nilfs_swap_super_block(nilfs
);
449 nilfs
->ns_sbwtime
[0] = le64_to_cpu(sbp
[0]->s_wtime
);
450 nilfs
->ns_sbwtime
[1] = valid
[!swp
] ? le64_to_cpu(sbp
[1]->s_wtime
) : 0;
451 nilfs
->ns_prot_seq
= le64_to_cpu(sbp
[valid
[1] & !swp
]->s_last_seq
);
457 * init_nilfs - initialize a NILFS instance.
458 * @nilfs: the_nilfs structure
459 * @sbi: nilfs_sb_info
461 * @data: mount options
463 * init_nilfs() performs common initialization per block device (e.g.
464 * reading the super block, getting disk layout information, initializing
465 * shared fields in the_nilfs). It takes on some portion of the jobs
466 * typically done by a fill_super() routine. This division arises from
467 * the nature that multiple NILFS instances may be simultaneously
468 * mounted on a device.
469 * For multiple mounts on the same device, only the first mount
470 * invokes these tasks.
472 * Return Value: On success, 0 is returned. On error, a negative error
475 int init_nilfs(struct the_nilfs
*nilfs
, struct nilfs_sb_info
*sbi
, char *data
)
477 struct super_block
*sb
= sbi
->s_super
;
478 struct nilfs_super_block
*sbp
;
479 struct backing_dev_info
*bdi
;
483 down_write(&nilfs
->ns_sem
);
484 if (nilfs_init(nilfs
)) {
485 /* Load values from existing the_nilfs */
486 sbp
= nilfs
->ns_sbp
[0];
487 err
= nilfs_store_magic_and_option(sb
, sbp
, data
);
491 blocksize
= BLOCK_SIZE
<< le32_to_cpu(sbp
->s_log_block_size
);
492 if (sb
->s_blocksize
!= blocksize
&&
493 !sb_set_blocksize(sb
, blocksize
)) {
494 printk(KERN_ERR
"NILFS: blocksize %d unfit to device\n",
498 sb
->s_maxbytes
= nilfs_max_size(sb
->s_blocksize_bits
);
502 blocksize
= sb_min_blocksize(sb
, BLOCK_SIZE
);
504 printk(KERN_ERR
"NILFS: unable to set blocksize\n");
508 err
= nilfs_load_super_block(nilfs
, sb
, blocksize
, &sbp
);
512 err
= nilfs_store_magic_and_option(sb
, sbp
, data
);
516 blocksize
= BLOCK_SIZE
<< le32_to_cpu(sbp
->s_log_block_size
);
517 if (sb
->s_blocksize
!= blocksize
) {
518 int hw_blocksize
= bdev_hardsect_size(sb
->s_bdev
);
520 if (blocksize
< hw_blocksize
) {
522 "NILFS: blocksize %d too small for device "
523 "(sector-size = %d).\n",
524 blocksize
, hw_blocksize
);
528 nilfs_release_super_block(nilfs
);
529 sb_set_blocksize(sb
, blocksize
);
531 err
= nilfs_load_super_block(nilfs
, sb
, blocksize
, &sbp
);
534 /* not failed_sbh; sbh is released automatically
535 when reloading fails. */
537 nilfs
->ns_blocksize_bits
= sb
->s_blocksize_bits
;
539 err
= nilfs_store_disk_layout(nilfs
, sbp
);
543 sb
->s_maxbytes
= nilfs_max_size(sb
->s_blocksize_bits
);
545 nilfs
->ns_mount_state
= le16_to_cpu(sbp
->s_state
);
547 bdi
= nilfs
->ns_bdev
->bd_inode_backing_dev_info
;
549 bdi
= nilfs
->ns_bdev
->bd_inode
->i_mapping
->backing_dev_info
;
550 nilfs
->ns_bdi
= bdi
? : &default_backing_dev_info
;
552 /* Finding last segment */
553 nilfs
->ns_last_pseg
= le64_to_cpu(sbp
->s_last_pseg
);
554 nilfs
->ns_last_cno
= le64_to_cpu(sbp
->s_last_cno
);
555 nilfs
->ns_last_seq
= le64_to_cpu(sbp
->s_last_seq
);
557 nilfs
->ns_seg_seq
= nilfs
->ns_last_seq
;
559 nilfs_get_segnum_of_block(nilfs
, nilfs
->ns_last_pseg
);
560 nilfs
->ns_cno
= nilfs
->ns_last_cno
+ 1;
561 if (nilfs
->ns_segnum
>= nilfs
->ns_nsegments
) {
562 printk(KERN_ERR
"NILFS invalid last segment number.\n");
567 nilfs
->ns_free_segments_count
=
568 nilfs
->ns_nsegments
- (nilfs
->ns_segnum
+ 1);
570 /* Initialize gcinode cache */
571 err
= nilfs_init_gccache(nilfs
);
575 set_nilfs_init(nilfs
);
578 up_write(&nilfs
->ns_sem
);
582 nilfs_release_super_block(nilfs
);
586 int nilfs_count_free_blocks(struct the_nilfs
*nilfs
, sector_t
*nblocks
)
588 struct inode
*dat
= nilfs_dat_inode(nilfs
);
589 unsigned long ncleansegs
;
592 down_read(&NILFS_MDT(dat
)->mi_sem
); /* XXX */
593 err
= nilfs_sufile_get_ncleansegs(nilfs
->ns_sufile
, &ncleansegs
);
594 up_read(&NILFS_MDT(dat
)->mi_sem
); /* XXX */
596 *nblocks
= (sector_t
)ncleansegs
* nilfs
->ns_blocks_per_segment
;
600 int nilfs_near_disk_full(struct the_nilfs
*nilfs
)
602 struct inode
*sufile
= nilfs
->ns_sufile
;
603 unsigned long ncleansegs
, nincsegs
;
606 ret
= nilfs_sufile_get_ncleansegs(sufile
, &ncleansegs
);
608 nincsegs
= atomic_read(&nilfs
->ns_ndirtyblks
) /
609 nilfs
->ns_blocks_per_segment
+ 1;
610 if (ncleansegs
<= nilfs
->ns_nrsvsegs
+ nincsegs
)
616 int nilfs_checkpoint_is_mounted(struct the_nilfs
*nilfs
, __u64 cno
,
619 struct nilfs_sb_info
*sbi
;
622 down_read(&nilfs
->ns_sem
);
623 if (cno
== 0 || cno
> nilfs
->ns_cno
)
626 list_for_each_entry(sbi
, &nilfs
->ns_supers
, s_list
) {
627 if (sbi
->s_snapshot_cno
== cno
&&
628 (!snapshot_mount
|| nilfs_test_opt(sbi
, SNAPSHOT
))) {
629 /* exclude read-only mounts */
634 /* for protecting recent checkpoints */
635 if (cno
>= nilfs_last_cno(nilfs
))
639 up_read(&nilfs
->ns_sem
);