Pull in fs-jbd2-journalc-kmalloc-memset-conversion-to-kzalloc.patch from -mm
[ext4-patch-queue.git] / uninitialized-block-groups.patch
blobf08f14bc4f1007eb6864dac15da295be48e9304c
1 Ext4: Uninitialized Block Groups
3 From: Andreas Dilger <adilger@clusterfs.com>
5 In pass1 of e2fsck, every inode table in the fileystem is scanned and checked,
6 regardless of whether it is in use. This is this the most time consuming part
7 of the filesystem check. The unintialized block group feature can greatly
8 reduce e2fsck time by eliminating checking of uninitialized inodes.
10 With this feature, there is a a high water mark of used inodes for each block
11 group. Block and inode bitmaps can be uninitialized on disk via a flag in the
12 group descriptor to avoid reading or scanning them at e2fsck time. A checksum
13 of each group descriptor is used to ensure that corruption in the group
14 descriptor's bit flags does not cause incorrect operation.
16 The feature is enabled through a mkfs option
18 mke2fs /dev/ -O uninit_groups
20 A patch adding support for uninitialized block groups to e2fsprogs tools has
21 been posted to the linux-ext4 mailing list.
23 The patches have been stress tested with fsstress and fsx. In performance
24 tests testing e2fsck time, we have seen that e2fsck time on ext3 grows
25 linearly with the total number of inodes in the filesytem. In ext4 with the
26 uninitialized block groups feature, the e2fsck time is constant, based
27 solely on the number of used inodes rather than the total inode count.
28 Since typical ext4 filesystems only use 1-10% of their inodes, this feature can
29 greatly reduce e2fsck time for users. With performance improvement of 2-20
30 times, depending on how full the filesystem is.
32 The attached graph shows the major improvements in e2fsck times in filesystems
33 with a large total inode count, but few inodes in use.
35 In each group descriptor if we have
37 EXT4_BG_INODE_UNINIT set in bg_flags:
38 Inode table is not initialized/used in this group. So we can skip
39 the consistency check during fsck.
40 EXT4_BG_BLOCK_UNINIT set in bg_flags:
41 No block in the group is used. So we can skip the block bitmap
42 verification for this group.
44 We also add two new fields to group descriptor as a part of
45 uninitialized group patch.
47 __le16 bg_itable_unused; /* Unused inodes count */
48 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
51 bg_itable_unused:
53 If we have EXT4_BG_INODE_UNINIT not set in bg_flags
54 then bg_itable_unused will give the offset within
55 the inode table till the inodes are used. This can be
56 used by fsck to skip list of inodes that are marked unused.
59 bg_checksum:
60 Now that we depend on bg_flags and bg_itable_unused to determine
61 the block and inode usage, we need to make sure group descriptor
62 is not corrupt. We add checksum to group descriptor to
63 detect corruption. If the descriptor is found to be corrupt, we
64 mark all the blocks and inodes in the group used.
67 Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
68 Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
69 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
70 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
71 ---
73 fs/Kconfig | 1
74 fs/ext4/balloc.c | 92 +++++++++++++++++++++++++++++-
75 fs/ext4/group.h | 29 +++++++++
76 fs/ext4/ialloc.c | 146 ++++++++++++++++++++++++++++++++++++++++++++---
77 fs/ext4/resize.c | 2 +
78 fs/ext4/super.c | 47 +++++++++++++++
79 include/linux/ext4_fs.h | 16 ++++-
80 7 files changed, 317 insertions(+), 16 deletions(-)
81 create mode 100644 fs/ext4/group.h
84 diff --git a/fs/Kconfig b/fs/Kconfig
85 index 58a0650..cb75d81 100644
86 --- a/fs/Kconfig
87 +++ b/fs/Kconfig
88 @@ -140,6 +140,7 @@ config EXT4DEV_FS
89 tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
90 depends on EXPERIMENTAL
91 select JBD2
92 + select CRC16
93 help
94 Ext4dev is a predecessor filesystem of the next generation
95 extended fs ext4, based on ext3 filesystem code. It will be
96 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
97 index e53b4af..d1a8882 100644
98 --- a/fs/ext4/balloc.c
99 +++ b/fs/ext4/balloc.c
100 @@ -20,6 +20,7 @@
101 #include <linux/quotaops.h>
102 #include <linux/buffer_head.h>
104 +#include "group.h"
106 * balloc.c contains the blocks allocation and deallocation routines
108 @@ -42,6 +43,74 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
112 +/* Initializes an uninitialized block bitmap if given, and returns the
113 + * number of blocks free in the group. */
114 +unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
115 + int block_group, struct ext4_group_desc *gdp)
117 + unsigned long start;
118 + int bit, bit_max;
119 + unsigned free_blocks;
120 + struct ext4_sb_info *sbi = EXT4_SB(sb);
122 + if (bh) {
123 + J_ASSERT_BH(bh, buffer_locked(bh));
125 + /* If checksum is bad mark all blocks used to prevent allocation
126 + * essentially implementing a per-group read-only flag. */
127 + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
128 + ext4_error(sb, __FUNCTION__,
129 + "Checksum bad for group %u\n", block_group);
130 + gdp->bg_free_blocks_count = 0;
131 + gdp->bg_free_inodes_count = 0;
132 + gdp->bg_itable_unused = 0;
133 + memset(bh->b_data, 0xff, sb->s_blocksize);
134 + return 0;
136 + memset(bh->b_data, 0, sb->s_blocksize);
139 + /* Check for superblock and gdt backups in this group */
140 + bit_max = ext4_bg_has_super(sb, block_group);
142 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
143 + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
144 + sbi->s_desc_per_block) {
145 + if (bit_max) {
146 + bit_max += ext4_bg_num_gdb(sb, block_group);
147 + bit_max +=
148 + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
150 + } else { /* For META_BG_BLOCK_GROUPS */
151 + int group_rel = (block_group -
152 + le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
153 + EXT4_DESC_PER_BLOCK(sb);
154 + if (group_rel == 0 || group_rel == 1 ||
155 + (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
156 + bit_max += 1;
159 + /* Last and first groups are always initialized */
160 + free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
162 + if (bh) {
163 + for (bit = 0; bit < bit_max; bit++)
164 + ext4_set_bit(bit, bh->b_data);
166 + start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
167 + le32_to_cpu(sbi->s_es->s_first_data_block);
169 + /* Set bits for block and inode bitmaps, and inode table */
170 + ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
171 + ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
172 + for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
173 + bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
174 + ext4_set_bit(bit, bh->b_data);
177 + return free_blocks - sbi->s_itb_per_group - 2;
181 * The free blocks are managed by bitmaps. A file system contains several
182 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
183 @@ -110,16 +179,29 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
185 * Return buffer_head on success or NULL in case of failure.
187 -static struct buffer_head *
188 +struct buffer_head *
189 read_block_bitmap(struct super_block *sb, unsigned int block_group)
191 struct ext4_group_desc * desc;
192 struct buffer_head * bh = NULL;
194 - desc = ext4_get_group_desc (sb, block_group, NULL);
195 + desc = ext4_get_group_desc(sb, block_group, NULL);
196 if (!desc)
197 goto error_out;
198 - bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
199 + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
200 + bh = sb_getblk(sb, ext4_block_bitmap(sb, desc));
201 + if (!buffer_uptodate(bh)) {
202 + lock_buffer(bh);
203 + if (!buffer_uptodate(bh)) {
204 + ext4_init_block_bitmap(sb, bh, block_group,
205 + desc);
206 + set_buffer_uptodate(bh);
208 + unlock_buffer(bh);
210 + } else {
211 + bh = sb_bread(sb, ext4_block_bitmap(sb,desc));
213 if (!bh)
214 ext4_error (sb, "read_block_bitmap",
215 "Cannot read block bitmap - "
216 @@ -586,6 +668,7 @@ do_more:
217 desc->bg_free_blocks_count =
218 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
219 group_freed);
220 + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
221 spin_unlock(sb_bgl_lock(sbi, block_group));
222 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
224 @@ -1644,8 +1727,11 @@ allocated:
225 ret_block, goal_hits, goal_attempts);
227 spin_lock(sb_bgl_lock(sbi, group_no));
228 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
229 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
230 gdp->bg_free_blocks_count =
231 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
232 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
233 spin_unlock(sb_bgl_lock(sbi, group_no));
234 percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
236 diff --git a/fs/ext4/group.h b/fs/ext4/group.h
237 new file mode 100644
238 index 0000000..9310979
239 --- /dev/null
240 +++ b/fs/ext4/group.h
241 @@ -0,0 +1,29 @@
243 + * linux/fs/ext4/group.h
245 + * Copyright (C) 2007 Cluster File Systems, Inc
247 + * Author: Andreas Dilger <adilger@clusterfs.com>
248 + */
250 +#ifndef _LINUX_EXT4_GROUP_H
251 +#define _LINUX_EXT4_GROUP_H
252 +#if defined(CONFIG_CRC16)
253 +#include <linux/crc16.h>
254 +#endif
256 +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
257 + struct ext4_group_desc *gdp);
258 +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
259 + struct ext4_group_desc *gdp);
260 +struct buffer_head *read_block_bitmap(struct super_block *sb,
261 + unsigned int block_group);
262 +extern unsigned ext4_init_block_bitmap(struct super_block *sb,
263 + struct buffer_head *bh, int group,
264 + struct ext4_group_desc *desc);
265 +#define ext4_free_blocks_after_init(sb, group, desc) \
266 + ext4_init_block_bitmap(sb, NULL, group, desc)
267 +extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
268 + struct buffer_head *bh, int group,
269 + struct ext4_group_desc *desc);
270 +#endif /* _LINUX_EXT4_GROUP_H */
271 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
272 index 427f830..601df13 100644
273 --- a/fs/ext4/ialloc.c
274 +++ b/fs/ext4/ialloc.c
275 @@ -28,6 +28,7 @@
277 #include "xattr.h"
278 #include "acl.h"
279 +#include "group.h"
282 * ialloc.c contains the inodes allocation and deallocation routines
283 @@ -43,6 +44,52 @@
284 * the free blocks count in the block.
288 + * To avoid calling the atomic setbit hundreds or thousands of times, we only
289 + * need to use it within a single byte (to ensure we get endianness right).
290 + * We can use memset for the rest of the bitmap as there are no other users.
291 + */
292 +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
294 + int i;
296 + if (start_bit >= end_bit)
297 + return;
299 + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
300 + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
301 + ext4_set_bit(i, bitmap);
302 + if (i < end_bit)
303 + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
306 +/* Initializes an uninitialized inode bitmap */
307 +unsigned ext4_init_inode_bitmap(struct super_block *sb,
308 + struct buffer_head *bh, int block_group,
309 + struct ext4_group_desc *gdp)
311 + struct ext4_sb_info *sbi = EXT4_SB(sb);
313 + J_ASSERT_BH(bh, buffer_locked(bh));
315 + /* If checksum is bad mark all blocks and inodes use to prevent
316 + * allocation, essentially implementing a per-group read-only flag. */
317 + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
318 + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
319 + block_group);
320 + gdp->bg_free_blocks_count = 0;
321 + gdp->bg_free_inodes_count = 0;
322 + gdp->bg_itable_unused = 0;
323 + memset(bh->b_data, 0xff, sb->s_blocksize);
324 + return 0;
327 + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
328 + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
329 + bh->b_data);
331 + return EXT4_INODES_PER_GROUP(sb);
335 * Read the inode allocation bitmap for a given block_group, reading
336 @@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
337 desc = ext4_get_group_desc(sb, block_group, NULL);
338 if (!desc)
339 goto error_out;
341 - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
342 + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
343 + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
344 + if (!buffer_uptodate(bh)) {
345 + lock_buffer(bh);
346 + if (!buffer_uptodate(bh)) {
347 + ext4_init_inode_bitmap(sb, bh, block_group,
348 + desc);
349 + set_buffer_uptodate(bh);
351 + unlock_buffer(bh);
353 + } else {
354 + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
356 if (!bh)
357 ext4_error(sb, "read_inode_bitmap",
358 "Cannot read inode bitmap - "
359 @@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
360 if (is_directory)
361 gdp->bg_used_dirs_count = cpu_to_le16(
362 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
363 + gdp->bg_checksum = ext4_group_desc_csum(sbi,
364 + block_group, gdp);
365 spin_unlock(sb_bgl_lock(sbi, block_group));
366 percpu_counter_inc(&sbi->s_freeinodes_counter);
367 if (is_directory)
368 @@ -438,7 +499,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
369 struct ext4_sb_info *sbi;
370 int err = 0;
371 struct inode *ret;
372 - int i;
373 + int i, free = 0;
375 /* Cannot create files in a deleted directory */
376 if (!dir || !dir->i_nlink)
377 @@ -520,11 +581,13 @@ repeat_in_this_group:
378 goto out;
380 got:
381 - ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
382 - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
383 - ext4_error (sb, "ext4_new_inode",
384 - "reserved inode or inode > inodes count - "
385 - "block_group = %d, inode=%lu", group, ino);
386 + ino++;
387 + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
388 + ino > EXT4_INODES_PER_GROUP(sb)) {
389 + ext4_error(sb, __FUNCTION__,
390 + "reserved inode or inode > inodes count - "
391 + "block_group = %d, inode=%lu", group,
392 + ino + group * EXT4_INODES_PER_GROUP(sb));
393 err = -EIO;
394 goto fail;
396 @@ -532,13 +595,78 @@ got:
397 BUFFER_TRACE(bh2, "get_write_access");
398 err = ext4_journal_get_write_access(handle, bh2);
399 if (err) goto fail;
401 + /* We may have to initialize the block bitmap if it isn't already */
402 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
403 + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
404 + struct buffer_head *block_bh = read_block_bitmap(sb, group);
406 + BUFFER_TRACE(block_bh, "get block bitmap access");
407 + err = ext4_journal_get_write_access(handle, block_bh);
408 + if (err) {
409 + brelse(block_bh);
410 + goto fail;
413 + free = 0;
414 + spin_lock(sb_bgl_lock(sbi, group));
415 + /* recheck and clear flag under lock if we still need to */
416 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
417 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
418 + free = ext4_free_blocks_after_init(sb, group, gdp);
419 + gdp->bg_free_blocks_count = cpu_to_le16(free);
421 + spin_unlock(sb_bgl_lock(sbi, group));
423 + /* Don't need to dirty bitmap block if we didn't change it */
424 + if (free) {
425 + BUFFER_TRACE(block_bh, "dirty block bitmap");
426 + err = ext4_journal_dirty_metadata(handle, block_bh);
429 + brelse(block_bh);
430 + if (err)
431 + goto fail;
434 spin_lock(sb_bgl_lock(sbi, group));
435 + /* If we didn't allocate from within the initialized part of the inode
436 + * table then we need to initialize up to this inode. */
437 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
438 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
439 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
441 + /* When marking the block group with
442 + * ~EXT4_BG_INODE_UNINIT we don't want to depend
443 + * on the value of bg_itable_unsed even though
444 + * mke2fs could have initialized the same for us.
445 + * Instead we calculated the value below
446 + */
448 + free = 0;
449 + } else {
450 + free = EXT4_INODES_PER_GROUP(sb) -
451 + le16_to_cpu(gdp->bg_itable_unused);
454 + /*
455 + * Check the relative inode number against the last used
456 + * relative inode number in this group. if it is greater
457 + * we need to update the bg_itable_unused count
459 + */
460 + if (ino > free)
461 + gdp->bg_itable_unused =
462 + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
465 gdp->bg_free_inodes_count =
466 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
467 if (S_ISDIR(mode)) {
468 gdp->bg_used_dirs_count =
469 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
471 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
472 spin_unlock(sb_bgl_lock(sbi, group));
473 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
474 err = ext4_journal_dirty_metadata(handle, bh2);
475 @@ -560,7 +688,7 @@ got:
476 inode->i_gid = current->fsgid;
477 inode->i_mode = mode;
479 - inode->i_ino = ino;
480 + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
481 /* This is the optimal IO size (for stat), not the fs block size */
482 inode->i_blocks = 0;
483 inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
484 diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
485 index aa11d7d..3359450 100644
486 --- a/fs/ext4/resize.c
487 +++ b/fs/ext4/resize.c
488 @@ -16,6 +16,7 @@
489 #include <linux/errno.h>
490 #include <linux/slab.h>
492 +#include "group.h"
494 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
495 #define inside(b, first, last) ((b) >= (first) && (b) < (last))
496 @@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
497 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
498 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
499 gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
500 + gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
503 * Make the new blocks and inodes valid next. We do this before
504 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
505 index 4550b83..43fa7f8 100644
506 --- a/fs/ext4/super.c
507 +++ b/fs/ext4/super.c
508 @@ -37,12 +37,14 @@
509 #include <linux/quotaops.h>
510 #include <linux/seq_file.h>
511 #include <linux/log2.h>
512 +#include <linux/crc16.h>
514 #include <asm/uaccess.h>
516 #include "xattr.h"
517 #include "acl.h"
518 #include "namei.h"
519 +#include "group.h"
521 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
522 unsigned long journal_devnum);
523 @@ -1237,6 +1239,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
524 return res;
527 +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
528 + struct ext4_group_desc *gdp)
530 + __u16 crc = 0;
532 + if (sbi->s_es->s_feature_ro_compat &
533 + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
534 + int offset = offsetof(struct ext4_group_desc, bg_checksum);
535 + __le32 le_group = cpu_to_le32(block_group);
537 + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
538 + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
539 + crc = crc16(crc, (__u8 *)gdp, offset);
540 + offset += sizeof(gdp->bg_checksum); /* skip checksum */
541 + /* for checksum of struct ext4_group_desc do the rest...*/
542 + if ((sbi->s_es->s_feature_incompat &
543 + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
544 + offset < le16_to_cpu(sbi->s_es->s_desc_size))
545 + crc = crc16(crc, (__u8 *)gdp + offset,
546 + le16_to_cpu(sbi->s_es->s_desc_size) -
547 + offset);
550 + return cpu_to_le16(crc);
553 +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
554 + struct ext4_group_desc *gdp)
556 + if ((sbi->s_es->s_feature_ro_compat &
557 + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
558 + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
559 + return 0;
561 + return 1;
564 /* Called at mount-time, super-block is locked */
565 static int ext4_check_descriptors (struct super_block * sb)
567 @@ -1291,6 +1330,14 @@ static int ext4_check_descriptors (struct super_block * sb)
568 i, inode_table);
569 return 0;
571 + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
572 + ext4_error(sb, __FUNCTION__,
573 + "Checksum for group %d failed (%u!=%u)\n", i,
574 + le16_to_cpu(ext4_group_desc_csum(sbi, i,
575 + gdp)),
576 + le16_to_cpu(gdp->bg_checksum));
577 + return 0;
579 first_block += EXT4_BLOCKS_PER_GROUP(sb);
580 gdp = (struct ext4_group_desc *)
581 ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
582 diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
583 index abd70d9..60ed361 100644
584 --- a/include/linux/ext4_fs.h
585 +++ b/include/linux/ext4_fs.h
586 @@ -123,19 +123,25 @@
588 struct ext4_group_desc
590 - __le32 bg_block_bitmap; /* Blocks bitmap block */
591 - __le32 bg_inode_bitmap; /* Inodes bitmap block */
592 + __le32 bg_block_bitmap; /* Blocks bitmap block */
593 + __le32 bg_inode_bitmap; /* Inodes bitmap block */
594 __le32 bg_inode_table; /* Inodes table block */
595 __le16 bg_free_blocks_count; /* Free blocks count */
596 __le16 bg_free_inodes_count; /* Free inodes count */
597 __le16 bg_used_dirs_count; /* Directories count */
598 - __u16 bg_flags;
599 - __u32 bg_reserved[3];
600 + __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
601 + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
602 + __le16 bg_itable_unused; /* Unused inodes count */
603 + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
604 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
605 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
606 __le32 bg_inode_table_hi; /* Inodes table block MSB */
609 +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
610 +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
611 +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
613 #ifdef __KERNEL__
614 #include <linux/ext4_fs_i.h>
615 #include <linux/ext4_fs_sb.h>
616 @@ -692,6 +698,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
617 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
618 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
619 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
620 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
621 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
622 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
624 @@ -711,6 +718,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
625 EXT4_FEATURE_INCOMPAT_64BIT)
626 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
627 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
628 + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
629 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
630 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
631 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)