2 * the_nilfs.c - the_nilfs shared structure.
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
24 #include <linux/buffer_head.h>
25 #include <linux/slab.h>
26 #include <linux/blkdev.h>
27 #include <linux/backing-dev.h>
28 #include <linux/crc32.h>
38 void nilfs_set_last_segment(struct the_nilfs
*nilfs
,
39 sector_t start_blocknr
, u64 seq
, __u64 cno
)
41 spin_lock(&nilfs
->ns_last_segment_lock
);
42 nilfs
->ns_last_pseg
= start_blocknr
;
43 nilfs
->ns_last_seq
= seq
;
44 nilfs
->ns_last_cno
= cno
;
45 spin_unlock(&nilfs
->ns_last_segment_lock
);
49 * alloc_nilfs - allocate the_nilfs structure
50 * @bdev: block device to which the_nilfs is related
52 * alloc_nilfs() allocates memory for the_nilfs and
53 * initializes its reference count and locks.
55 * Return Value: On success, pointer to the_nilfs is returned.
56 * On error, NULL is returned.
58 struct the_nilfs
*alloc_nilfs(struct block_device
*bdev
)
60 struct the_nilfs
*nilfs
;
62 nilfs
= kzalloc(sizeof(*nilfs
), GFP_KERNEL
);
66 nilfs
->ns_bdev
= bdev
;
67 atomic_set(&nilfs
->ns_count
, 1);
68 atomic_set(&nilfs
->ns_writer_refcount
, -1);
69 atomic_set(&nilfs
->ns_ndirtyblks
, 0);
70 init_rwsem(&nilfs
->ns_sem
);
71 mutex_init(&nilfs
->ns_writer_mutex
);
72 INIT_LIST_HEAD(&nilfs
->ns_supers
);
73 spin_lock_init(&nilfs
->ns_last_segment_lock
);
74 nilfs
->ns_gc_inodes_h
= NULL
;
75 init_rwsem(&nilfs
->ns_segctor_sem
);
81 * put_nilfs - release a reference to the_nilfs
82 * @nilfs: the_nilfs structure to be released
84 * put_nilfs() decrements a reference counter of the_nilfs.
85 * If the reference count reaches zero, the_nilfs is freed.
87 void put_nilfs(struct the_nilfs
*nilfs
)
89 if (!atomic_dec_and_test(&nilfs
->ns_count
))
92 * Increment of ns_count never occur below because the caller
93 * of get_nilfs() holds at least one reference to the_nilfs.
94 * Thus its exclusion control is not required here.
97 if (nilfs_loaded(nilfs
)) {
98 nilfs_mdt_clear(nilfs
->ns_sufile
);
99 nilfs_mdt_destroy(nilfs
->ns_sufile
);
100 nilfs_mdt_clear(nilfs
->ns_cpfile
);
101 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
102 nilfs_mdt_clear(nilfs
->ns_dat
);
103 nilfs_mdt_destroy(nilfs
->ns_dat
);
104 /* XXX: how and when to clear nilfs->ns_gc_dat? */
105 nilfs_mdt_destroy(nilfs
->ns_gc_dat
);
107 if (nilfs_init(nilfs
)) {
108 nilfs_destroy_gccache(nilfs
);
109 brelse(nilfs
->ns_sbh
[0]);
110 brelse(nilfs
->ns_sbh
[1]);
115 static int nilfs_load_super_root(struct the_nilfs
*nilfs
,
116 struct nilfs_sb_info
*sbi
, sector_t sr_block
)
118 struct buffer_head
*bh_sr
;
119 struct nilfs_super_root
*raw_sr
;
120 struct nilfs_super_block
**sbp
= nilfs
->ns_sbp
;
121 unsigned dat_entry_size
, segment_usage_size
, checkpoint_size
;
125 err
= nilfs_read_super_root_block(sbi
->s_super
, sr_block
, &bh_sr
, 1);
129 down_read(&nilfs
->ns_sem
);
130 dat_entry_size
= le16_to_cpu(sbp
[0]->s_dat_entry_size
);
131 checkpoint_size
= le16_to_cpu(sbp
[0]->s_checkpoint_size
);
132 segment_usage_size
= le16_to_cpu(sbp
[0]->s_segment_usage_size
);
133 up_read(&nilfs
->ns_sem
);
135 inode_size
= nilfs
->ns_inode_size
;
138 nilfs
->ns_dat
= nilfs_mdt_new(
139 nilfs
, NULL
, NILFS_DAT_INO
, NILFS_DAT_GFP
);
140 if (unlikely(!nilfs
->ns_dat
))
143 nilfs
->ns_gc_dat
= nilfs_mdt_new(
144 nilfs
, NULL
, NILFS_DAT_INO
, NILFS_DAT_GFP
);
145 if (unlikely(!nilfs
->ns_gc_dat
))
148 nilfs
->ns_cpfile
= nilfs_mdt_new(
149 nilfs
, NULL
, NILFS_CPFILE_INO
, NILFS_CPFILE_GFP
);
150 if (unlikely(!nilfs
->ns_cpfile
))
153 nilfs
->ns_sufile
= nilfs_mdt_new(
154 nilfs
, NULL
, NILFS_SUFILE_INO
, NILFS_SUFILE_GFP
);
155 if (unlikely(!nilfs
->ns_sufile
))
158 err
= nilfs_palloc_init_blockgroup(nilfs
->ns_dat
, dat_entry_size
);
162 err
= nilfs_palloc_init_blockgroup(nilfs
->ns_gc_dat
, dat_entry_size
);
166 nilfs_mdt_set_shadow(nilfs
->ns_dat
, nilfs
->ns_gc_dat
);
167 nilfs_mdt_set_entry_size(nilfs
->ns_cpfile
, checkpoint_size
,
168 sizeof(struct nilfs_cpfile_header
));
169 nilfs_mdt_set_entry_size(nilfs
->ns_sufile
, segment_usage_size
,
170 sizeof(struct nilfs_sufile_header
));
172 err
= nilfs_mdt_read_inode_direct(
173 nilfs
->ns_dat
, bh_sr
, NILFS_SR_DAT_OFFSET(inode_size
));
177 err
= nilfs_mdt_read_inode_direct(
178 nilfs
->ns_cpfile
, bh_sr
, NILFS_SR_CPFILE_OFFSET(inode_size
));
182 err
= nilfs_mdt_read_inode_direct(
183 nilfs
->ns_sufile
, bh_sr
, NILFS_SR_SUFILE_OFFSET(inode_size
));
187 raw_sr
= (struct nilfs_super_root
*)bh_sr
->b_data
;
188 nilfs
->ns_nongc_ctime
= le64_to_cpu(raw_sr
->sr_nongc_ctime
);
195 nilfs_mdt_destroy(nilfs
->ns_sufile
);
198 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
201 nilfs_mdt_destroy(nilfs
->ns_gc_dat
);
204 nilfs_mdt_destroy(nilfs
->ns_dat
);
208 static void nilfs_init_recovery_info(struct nilfs_recovery_info
*ri
)
210 memset(ri
, 0, sizeof(*ri
));
211 INIT_LIST_HEAD(&ri
->ri_used_segments
);
214 static void nilfs_clear_recovery_info(struct nilfs_recovery_info
*ri
)
216 nilfs_dispose_segment_list(&ri
->ri_used_segments
);
220 * load_nilfs - load and recover the nilfs
221 * @nilfs: the_nilfs structure to be released
222 * @sbi: nilfs_sb_info used to recover past segment
224 * load_nilfs() searches and load the latest super root,
225 * attaches the last segment, and does recovery if needed.
226 * The caller must call this exclusively for simultaneous mounts.
228 int load_nilfs(struct the_nilfs
*nilfs
, struct nilfs_sb_info
*sbi
)
230 struct nilfs_recovery_info ri
;
231 unsigned int s_flags
= sbi
->s_super
->s_flags
;
232 int really_read_only
= bdev_read_only(nilfs
->ns_bdev
);
236 nilfs_init_recovery_info(&ri
);
238 down_write(&nilfs
->ns_sem
);
239 valid_fs
= (nilfs
->ns_mount_state
& NILFS_VALID_FS
);
240 up_write(&nilfs
->ns_sem
);
242 if (!valid_fs
&& (s_flags
& MS_RDONLY
)) {
243 printk(KERN_INFO
"NILFS: INFO: recovery "
244 "required for readonly filesystem.\n");
245 if (really_read_only
) {
246 printk(KERN_ERR
"NILFS: write access "
247 "unavailable, cannot proceed.\n");
251 printk(KERN_INFO
"NILFS: write access will "
252 "be enabled during recovery.\n");
253 sbi
->s_super
->s_flags
&= ~MS_RDONLY
;
256 err
= nilfs_search_super_root(nilfs
, sbi
, &ri
);
258 printk(KERN_ERR
"NILFS: error searching super root.\n");
262 err
= nilfs_load_super_root(nilfs
, sbi
, ri
.ri_super_root
);
264 printk(KERN_ERR
"NILFS: error loading super root.\n");
269 err
= nilfs_recover_logical_segments(nilfs
, sbi
, &ri
);
271 nilfs_mdt_destroy(nilfs
->ns_cpfile
);
272 nilfs_mdt_destroy(nilfs
->ns_sufile
);
273 nilfs_mdt_destroy(nilfs
->ns_dat
);
276 if (ri
.ri_need_recovery
== NILFS_RECOVERY_SR_UPDATED
)
277 sbi
->s_super
->s_dirt
= 1;
280 set_nilfs_loaded(nilfs
);
283 nilfs_clear_recovery_info(&ri
);
284 sbi
->s_super
->s_flags
= s_flags
;
288 static unsigned long long nilfs_max_size(unsigned int blkbits
)
290 unsigned int max_bits
;
291 unsigned long long res
= MAX_LFS_FILESIZE
; /* page cache limit */
293 max_bits
= blkbits
+ NILFS_BMAP_KEY_BIT
; /* bmap size limit */
295 res
= min_t(unsigned long long, res
, (1ULL << max_bits
) - 1);
299 static int nilfs_store_disk_layout(struct the_nilfs
*nilfs
,
300 struct nilfs_super_block
*sbp
)
302 if (le32_to_cpu(sbp
->s_rev_level
) != NILFS_CURRENT_REV
) {
303 printk(KERN_ERR
"NILFS: revision mismatch "
304 "(superblock rev.=%d.%d, current rev.=%d.%d). "
305 "Please check the version of mkfs.nilfs.\n",
306 le32_to_cpu(sbp
->s_rev_level
),
307 le16_to_cpu(sbp
->s_minor_rev_level
),
308 NILFS_CURRENT_REV
, NILFS_MINOR_REV
);
311 nilfs
->ns_sbsize
= le16_to_cpu(sbp
->s_bytes
);
312 if (nilfs
->ns_sbsize
> BLOCK_SIZE
)
315 nilfs
->ns_inode_size
= le16_to_cpu(sbp
->s_inode_size
);
316 nilfs
->ns_first_ino
= le32_to_cpu(sbp
->s_first_ino
);
318 nilfs
->ns_blocks_per_segment
= le32_to_cpu(sbp
->s_blocks_per_segment
);
319 if (nilfs
->ns_blocks_per_segment
< NILFS_SEG_MIN_BLOCKS
) {
320 printk(KERN_ERR
"NILFS: too short segment. \n");
324 nilfs
->ns_first_data_block
= le64_to_cpu(sbp
->s_first_data_block
);
325 nilfs
->ns_nsegments
= le64_to_cpu(sbp
->s_nsegments
);
326 nilfs
->ns_r_segments_percentage
=
327 le32_to_cpu(sbp
->s_r_segments_percentage
);
329 max_t(unsigned long, NILFS_MIN_NRSVSEGS
,
330 DIV_ROUND_UP(nilfs
->ns_nsegments
*
331 nilfs
->ns_r_segments_percentage
, 100));
332 nilfs
->ns_crc_seed
= le32_to_cpu(sbp
->s_crc_seed
);
336 static int nilfs_valid_sb(struct nilfs_super_block
*sbp
)
338 static unsigned char sum
[4];
339 const int sumoff
= offsetof(struct nilfs_super_block
, s_sum
);
343 if (!sbp
|| le16_to_cpu(sbp
->s_magic
) != NILFS_SUPER_MAGIC
)
345 bytes
= le16_to_cpu(sbp
->s_bytes
);
346 if (bytes
> BLOCK_SIZE
)
348 crc
= crc32_le(le32_to_cpu(sbp
->s_crc_seed
), (unsigned char *)sbp
,
350 crc
= crc32_le(crc
, sum
, 4);
351 crc
= crc32_le(crc
, (unsigned char *)sbp
+ sumoff
+ 4,
353 return crc
== le32_to_cpu(sbp
->s_sum
);
356 static int nilfs_sb2_bad_offset(struct nilfs_super_block
*sbp
, u64 offset
)
358 return offset
< ((le64_to_cpu(sbp
->s_nsegments
) *
359 le32_to_cpu(sbp
->s_blocks_per_segment
)) <<
360 (le32_to_cpu(sbp
->s_log_block_size
) + 10));
363 static void nilfs_release_super_block(struct the_nilfs
*nilfs
)
367 for (i
= 0; i
< 2; i
++) {
368 if (nilfs
->ns_sbp
[i
]) {
369 brelse(nilfs
->ns_sbh
[i
]);
370 nilfs
->ns_sbh
[i
] = NULL
;
371 nilfs
->ns_sbp
[i
] = NULL
;
376 void nilfs_fall_back_super_block(struct the_nilfs
*nilfs
)
378 brelse(nilfs
->ns_sbh
[0]);
379 nilfs
->ns_sbh
[0] = nilfs
->ns_sbh
[1];
380 nilfs
->ns_sbp
[0] = nilfs
->ns_sbp
[1];
381 nilfs
->ns_sbh
[1] = NULL
;
382 nilfs
->ns_sbp
[1] = NULL
;
385 void nilfs_swap_super_block(struct the_nilfs
*nilfs
)
387 struct buffer_head
*tsbh
= nilfs
->ns_sbh
[0];
388 struct nilfs_super_block
*tsbp
= nilfs
->ns_sbp
[0];
390 nilfs
->ns_sbh
[0] = nilfs
->ns_sbh
[1];
391 nilfs
->ns_sbp
[0] = nilfs
->ns_sbp
[1];
392 nilfs
->ns_sbh
[1] = tsbh
;
393 nilfs
->ns_sbp
[1] = tsbp
;
396 static int nilfs_load_super_block(struct the_nilfs
*nilfs
,
397 struct super_block
*sb
, int blocksize
,
398 struct nilfs_super_block
**sbpp
)
400 struct nilfs_super_block
**sbp
= nilfs
->ns_sbp
;
401 struct buffer_head
**sbh
= nilfs
->ns_sbh
;
402 u64 sb2off
= NILFS_SB2_OFFSET_BYTES(nilfs
->ns_bdev
->bd_inode
->i_size
);
403 int valid
[2], swp
= 0;
405 sbp
[0] = nilfs_read_super_block(sb
, NILFS_SB_OFFSET_BYTES
, blocksize
,
407 sbp
[1] = nilfs_read_super_block(sb
, sb2off
, blocksize
, &sbh
[1]);
411 printk(KERN_ERR
"NILFS: unable to read superblock\n");
415 "NILFS warning: unable to read primary superblock\n");
418 "NILFS warning: unable to read secondary superblock\n");
420 valid
[0] = nilfs_valid_sb(sbp
[0]);
421 valid
[1] = nilfs_valid_sb(sbp
[1]);
424 le64_to_cpu(sbp
[1]->s_wtime
) > le64_to_cpu(sbp
[0]->s_wtime
));
426 if (valid
[swp
] && nilfs_sb2_bad_offset(sbp
[swp
], sb2off
)) {
433 nilfs_release_super_block(nilfs
);
434 printk(KERN_ERR
"NILFS: Can't find nilfs on dev %s.\n",
440 printk(KERN_WARNING
"NILFS warning: broken superblock. "
441 "using spare superblock.\n");
442 nilfs_swap_super_block(nilfs
);
445 nilfs
->ns_sbwtime
[0] = le64_to_cpu(sbp
[0]->s_wtime
);
446 nilfs
->ns_sbwtime
[1] = valid
[!swp
] ? le64_to_cpu(sbp
[1]->s_wtime
) : 0;
447 nilfs
->ns_prot_seq
= le64_to_cpu(sbp
[valid
[1] & !swp
]->s_last_seq
);
453 * init_nilfs - initialize a NILFS instance.
454 * @nilfs: the_nilfs structure
455 * @sbi: nilfs_sb_info
457 * @data: mount options
459 * init_nilfs() performs common initialization per block device (e.g.
460 * reading the super block, getting disk layout information, initializing
461 * shared fields in the_nilfs). It takes on some portion of the jobs
462 * typically done by a fill_super() routine. This division arises from
463 * the nature that multiple NILFS instances may be simultaneously
464 * mounted on a device.
465 * For multiple mounts on the same device, only the first mount
466 * invokes these tasks.
468 * Return Value: On success, 0 is returned. On error, a negative error
471 int init_nilfs(struct the_nilfs
*nilfs
, struct nilfs_sb_info
*sbi
, char *data
)
473 struct super_block
*sb
= sbi
->s_super
;
474 struct nilfs_super_block
*sbp
;
475 struct backing_dev_info
*bdi
;
479 down_write(&nilfs
->ns_sem
);
480 if (nilfs_init(nilfs
)) {
481 /* Load values from existing the_nilfs */
482 sbp
= nilfs
->ns_sbp
[0];
483 err
= nilfs_store_magic_and_option(sb
, sbp
, data
);
487 blocksize
= BLOCK_SIZE
<< le32_to_cpu(sbp
->s_log_block_size
);
488 if (sb
->s_blocksize
!= blocksize
&&
489 !sb_set_blocksize(sb
, blocksize
)) {
490 printk(KERN_ERR
"NILFS: blocksize %d unfit to device\n",
494 sb
->s_maxbytes
= nilfs_max_size(sb
->s_blocksize_bits
);
498 blocksize
= sb_min_blocksize(sb
, BLOCK_SIZE
);
500 printk(KERN_ERR
"NILFS: unable to set blocksize\n");
504 err
= nilfs_load_super_block(nilfs
, sb
, blocksize
, &sbp
);
508 err
= nilfs_store_magic_and_option(sb
, sbp
, data
);
512 blocksize
= BLOCK_SIZE
<< le32_to_cpu(sbp
->s_log_block_size
);
513 if (sb
->s_blocksize
!= blocksize
) {
514 int hw_blocksize
= bdev_hardsect_size(sb
->s_bdev
);
516 if (blocksize
< hw_blocksize
) {
518 "NILFS: blocksize %d too small for device "
519 "(sector-size = %d).\n",
520 blocksize
, hw_blocksize
);
524 nilfs_release_super_block(nilfs
);
525 sb_set_blocksize(sb
, blocksize
);
527 err
= nilfs_load_super_block(nilfs
, sb
, blocksize
, &sbp
);
530 /* not failed_sbh; sbh is released automatically
531 when reloading fails. */
533 nilfs
->ns_blocksize_bits
= sb
->s_blocksize_bits
;
535 err
= nilfs_store_disk_layout(nilfs
, sbp
);
539 sb
->s_maxbytes
= nilfs_max_size(sb
->s_blocksize_bits
);
541 nilfs
->ns_mount_state
= le16_to_cpu(sbp
->s_state
);
543 bdi
= nilfs
->ns_bdev
->bd_inode_backing_dev_info
;
545 bdi
= nilfs
->ns_bdev
->bd_inode
->i_mapping
->backing_dev_info
;
546 nilfs
->ns_bdi
= bdi
? : &default_backing_dev_info
;
548 /* Finding last segment */
549 nilfs
->ns_last_pseg
= le64_to_cpu(sbp
->s_last_pseg
);
550 nilfs
->ns_last_cno
= le64_to_cpu(sbp
->s_last_cno
);
551 nilfs
->ns_last_seq
= le64_to_cpu(sbp
->s_last_seq
);
553 nilfs
->ns_seg_seq
= nilfs
->ns_last_seq
;
555 nilfs_get_segnum_of_block(nilfs
, nilfs
->ns_last_pseg
);
556 nilfs
->ns_cno
= nilfs
->ns_last_cno
+ 1;
557 if (nilfs
->ns_segnum
>= nilfs
->ns_nsegments
) {
558 printk(KERN_ERR
"NILFS invalid last segment number.\n");
563 nilfs
->ns_free_segments_count
=
564 nilfs
->ns_nsegments
- (nilfs
->ns_segnum
+ 1);
566 /* Initialize gcinode cache */
567 err
= nilfs_init_gccache(nilfs
);
571 set_nilfs_init(nilfs
);
574 up_write(&nilfs
->ns_sem
);
578 nilfs_release_super_block(nilfs
);
582 int nilfs_count_free_blocks(struct the_nilfs
*nilfs
, sector_t
*nblocks
)
584 struct inode
*dat
= nilfs_dat_inode(nilfs
);
585 unsigned long ncleansegs
;
588 down_read(&NILFS_MDT(dat
)->mi_sem
); /* XXX */
589 err
= nilfs_sufile_get_ncleansegs(nilfs
->ns_sufile
, &ncleansegs
);
590 up_read(&NILFS_MDT(dat
)->mi_sem
); /* XXX */
592 *nblocks
= (sector_t
)ncleansegs
* nilfs
->ns_blocks_per_segment
;
596 int nilfs_near_disk_full(struct the_nilfs
*nilfs
)
598 struct inode
*sufile
= nilfs
->ns_sufile
;
599 unsigned long ncleansegs
, nincsegs
;
602 ret
= nilfs_sufile_get_ncleansegs(sufile
, &ncleansegs
);
604 nincsegs
= atomic_read(&nilfs
->ns_ndirtyblks
) /
605 nilfs
->ns_blocks_per_segment
+ 1;
606 if (ncleansegs
<= nilfs
->ns_nrsvsegs
+ nincsegs
)
612 int nilfs_checkpoint_is_mounted(struct the_nilfs
*nilfs
, __u64 cno
,
615 struct nilfs_sb_info
*sbi
;
618 down_read(&nilfs
->ns_sem
);
619 if (cno
== 0 || cno
> nilfs
->ns_cno
)
622 list_for_each_entry(sbi
, &nilfs
->ns_supers
, s_list
) {
623 if (sbi
->s_snapshot_cno
== cno
&&
624 (!snapshot_mount
|| nilfs_test_opt(sbi
, SNAPSHOT
))) {
625 /* exclude read-only mounts */
630 /* for protecting recent checkpoints */
631 if (cno
>= nilfs_last_cno(nilfs
))
635 up_read(&nilfs
->ns_sem
);