More patch description fixups. Standardize case.
[ext4-patch-queue.git] / jbd2-journal-chksum.patch
blob40713160f68c39efe537ff57a3cacaee4417675d
1 ext4: Add the journal checksum feature
3 From: Girish Shilamkar <girish@clusterfs.com>
5 The journal checksum feature adds two new flags i.e
6 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM.
8 JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the
9 checksum for the blocks described by the descriptor blocks.
10 Due to checksums, writing of the commit record no longer needs to be
11 synchronous. Now commit record can be sent to disk without waiting for
12 descriptor blocks to be written to disk. This behavior is controlled
13 using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be
14 able to recover the journal with _ASYNC_COMMIT hence it is made
15 incompat.
16 The commit header has been extended to hold the checksum along with the
17 type of the checksum.
19 For recovery in pass scan checksums are verified to ensure the sanity
20 and completeness(in case of _ASYNC_COMMIT) of every transaction.
22 Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
23 Signed-off-by: Girish Shilamkar <girish@clusterfs.com>
24 Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
25 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
26 ---
28 Documentation/filesystems/ext4.txt | 10 +
29 fs/Kconfig | 1
30 fs/ext4/super.c | 25 ++++
31 fs/jbd2/commit.c | 196 +++++++++++++++++++++++++++----------
32 fs/jbd2/journal.c | 28 +++++
33 fs/jbd2/recovery.c | 149 ++++++++++++++++++++++++++--
34 include/linux/ext4_fs.h | 3
35 include/linux/jbd2.h | 36 ++++++
36 8 files changed, 388 insertions(+), 60 deletions(-)
39 Index: linux-2.6.24-rc7/Documentation/filesystems/ext4.txt
40 ===================================================================
41 --- linux-2.6.24-rc7.orig/Documentation/filesystems/ext4.txt 2008-01-16 13:47:29.000000000 -0800
42 +++ linux-2.6.24-rc7/Documentation/filesystems/ext4.txt 2008-01-16 13:50:14.000000000 -0800
43 @@ -89,6 +89,16 @@ When mounting an ext4 filesystem, the fo
44 extents ext4 will use extents to address file data. The
45 file system will no longer be mountable by ext3.
47 +journal_checksum Enable checksumming of the journal transactions.
48 + This will allow the recovery code in e2fsck and the
49 + kernel to detect corruption in the kernel. It is a
50 + compatible change and will be ignored by older kernels.
52 +journal_async_commit Commit block can be written to disk without waiting
53 + for descriptor blocks. If enabled older kernels cannot
54 + mount the device. This will enable 'journal_checksum'
55 + internally.
57 journal=update Update the ext4 file system's journal to the current
58 format.
60 Index: linux-2.6.24-rc7/fs/Kconfig
61 ===================================================================
62 --- linux-2.6.24-rc7.orig/fs/Kconfig 2008-01-16 13:47:29.000000000 -0800
63 +++ linux-2.6.24-rc7/fs/Kconfig 2008-01-16 13:50:14.000000000 -0800
64 @@ -236,6 +236,7 @@ config JBD_DEBUG
66 config JBD2
67 tristate
68 + select CRC32
69 help
70 This is a generic journaling layer for block devices that support
71 both 32-bit and 64-bit block numbers. It is currently used by
72 Index: linux-2.6.24-rc7/fs/ext4/super.c
73 ===================================================================
74 --- linux-2.6.24-rc7.orig/fs/ext4/super.c 2008-01-16 13:50:09.000000000 -0800
75 +++ linux-2.6.24-rc7/fs/ext4/super.c 2008-01-16 13:50:14.000000000 -0800
76 @@ -869,6 +869,7 @@ enum {
77 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
78 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
79 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
80 + Opt_journal_checksum, Opt_journal_async_commit,
81 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
82 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
83 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
84 @@ -908,6 +909,8 @@ static match_table_t tokens = {
85 {Opt_journal_update, "journal=update"},
86 {Opt_journal_inum, "journal=%u"},
87 {Opt_journal_dev, "journal_dev=%u"},
88 + {Opt_journal_checksum, "journal_checksum"},
89 + {Opt_journal_async_commit, "journal_async_commit"},
90 {Opt_abort, "abort"},
91 {Opt_data_journal, "data=journal"},
92 {Opt_data_ordered, "data=ordered"},
93 @@ -1095,6 +1098,13 @@ static int parse_options (char *options,
94 return 0;
95 *journal_devnum = option;
96 break;
97 + case Opt_journal_checksum:
98 + set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
99 + break;
100 + case Opt_journal_async_commit:
101 + set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
102 + set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
103 + break;
104 case Opt_noload:
105 set_opt (sbi->s_mount_opt, NOLOAD);
106 break;
107 @@ -2114,6 +2124,21 @@ static int ext4_fill_super (struct super
108 goto failed_mount4;
111 + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
112 + jbd2_journal_set_features(sbi->s_journal,
113 + JBD2_FEATURE_COMPAT_CHECKSUM, 0,
114 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
115 + } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
116 + jbd2_journal_set_features(sbi->s_journal,
117 + JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
118 + jbd2_journal_clear_features(sbi->s_journal, 0, 0,
119 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
120 + } else {
121 + jbd2_journal_clear_features(sbi->s_journal,
122 + JBD2_FEATURE_COMPAT_CHECKSUM, 0,
123 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
126 /* We have now updated the journal if required, so we can
127 * validate the data journaling mode. */
128 switch (test_opt(sb, DATA_FLAGS)) {
129 Index: linux-2.6.24-rc7/fs/jbd2/commit.c
130 ===================================================================
131 --- linux-2.6.24-rc7.orig/fs/jbd2/commit.c 2008-01-16 13:50:13.000000000 -0800
132 +++ linux-2.6.24-rc7/fs/jbd2/commit.c 2008-01-16 13:50:14.000000000 -0800
133 @@ -21,6 +21,7 @@
134 #include <linux/mm.h>
135 #include <linux/pagemap.h>
136 #include <linux/jiffies.h>
137 +#include <linux/crc32.h>
140 * Default IO end handler for temporary BJ_IO buffer_heads.
141 @@ -93,19 +94,23 @@ static int inverted_lock(journal_t *jour
142 return 1;
145 -/* Done it all: now write the commit record. We should have
147 + * Done it all: now submit the commit record. We should have
148 * cleaned up our previous buffers by now, so if we are in abort
149 * mode we can now just skip the rest of the journal write
150 * entirely.
152 * Returns 1 if the journal needs to be aborted or 0 on success
154 -static int journal_write_commit_record(journal_t *journal,
155 - transaction_t *commit_transaction)
156 +static int journal_submit_commit_record(journal_t *journal,
157 + transaction_t *commit_transaction,
158 + struct buffer_head **cbh,
159 + __u32 crc32_sum)
161 struct journal_head *descriptor;
162 + struct commit_header *tmp;
163 struct buffer_head *bh;
164 - int i, ret;
165 + int ret;
166 int barrier_done = 0;
168 if (is_journal_aborted(journal))
169 @@ -117,21 +122,33 @@ static int journal_write_commit_record(j
171 bh = jh2bh(descriptor);
173 - /* AKPM: buglet - add `i' to tmp! */
174 - for (i = 0; i < bh->b_size; i += 512) {
175 - journal_header_t *tmp = (journal_header_t*)bh->b_data;
176 - tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
177 - tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
178 - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
179 + tmp = (struct commit_header *)bh->b_data;
180 + tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
181 + tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
182 + tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
184 + if (JBD2_HAS_COMPAT_FEATURE(journal,
185 + JBD2_FEATURE_COMPAT_CHECKSUM)) {
186 + tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
187 + tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
188 + tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
191 - JBUFFER_TRACE(descriptor, "write commit block");
192 + JBUFFER_TRACE(descriptor, "submit commit block");
193 + lock_buffer(bh);
195 set_buffer_dirty(bh);
196 - if (journal->j_flags & JBD2_BARRIER) {
197 + set_buffer_uptodate(bh);
198 + bh->b_end_io = journal_end_buffer_io_sync;
200 + if (journal->j_flags & JBD2_BARRIER &&
201 + !JBD2_HAS_COMPAT_FEATURE(journal,
202 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
203 set_buffer_ordered(bh);
204 barrier_done = 1;
206 - ret = sync_dirty_buffer(bh);
207 + ret = submit_bh(WRITE, bh);
209 /* is it possible for another commit to fail at roughly
210 * the same time as this one? If so, we don't want to
211 * trust the barrier flag in the super, but instead want
212 @@ -152,14 +169,72 @@ static int journal_write_commit_record(j
213 clear_buffer_ordered(bh);
214 set_buffer_uptodate(bh);
215 set_buffer_dirty(bh);
216 - ret = sync_dirty_buffer(bh);
217 + ret = submit_bh(WRITE, bh);
219 - put_bh(bh); /* One for getblk() */
220 - jbd2_journal_put_journal_head(descriptor);
221 + *cbh = bh;
222 + return ret;
226 + * This function along with journal_submit_commit_record
227 + * allows to write the commit record asynchronously.
228 + */
229 +static int journal_wait_on_commit_record(struct buffer_head *bh)
231 + int ret = 0;
233 + clear_buffer_dirty(bh);
234 + wait_on_buffer(bh);
236 + if (unlikely(!buffer_uptodate(bh)))
237 + ret = -EIO;
238 + put_bh(bh); /* One for getblk() */
239 + jbd2_journal_put_journal_head(bh2jh(bh));
241 - return (ret == -EIO);
242 + return ret;
246 + * Wait for all submitted IO to complete.
247 + */
248 +static int journal_wait_on_locked_list(journal_t *journal,
249 + transaction_t *commit_transaction)
251 + int ret = 0;
252 + struct journal_head *jh;
254 + while (commit_transaction->t_locked_list) {
255 + struct buffer_head *bh;
257 + jh = commit_transaction->t_locked_list->b_tprev;
258 + bh = jh2bh(jh);
259 + get_bh(bh);
260 + if (buffer_locked(bh)) {
261 + spin_unlock(&journal->j_list_lock);
262 + wait_on_buffer(bh);
263 + if (unlikely(!buffer_uptodate(bh)))
264 + ret = -EIO;
265 + spin_lock(&journal->j_list_lock);
267 + if (!inverted_lock(journal, bh)) {
268 + put_bh(bh);
269 + spin_lock(&journal->j_list_lock);
270 + continue;
272 + if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
273 + __jbd2_journal_unfile_buffer(jh);
274 + jbd_unlock_bh_state(bh);
275 + jbd2_journal_remove_journal_head(bh);
276 + put_bh(bh);
277 + } else {
278 + jbd_unlock_bh_state(bh);
280 + put_bh(bh);
281 + cond_resched_lock(&journal->j_list_lock);
283 + return ret;
286 static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
288 int i;
289 @@ -275,6 +350,20 @@ write_out_data:
290 journal_do_submit_data(wbuf, bufs);
293 +static inline __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
295 + struct page *page = bh->b_page;
296 + char *addr;
297 + __u32 checksum;
299 + addr = kmap_atomic(page, KM_USER0);
300 + checksum = crc32_be(crc32_sum,
301 + (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
302 + kunmap_atomic(addr, KM_USER0);
304 + return checksum;
307 static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
308 unsigned long long block)
310 @@ -307,6 +396,8 @@ void jbd2_journal_commit_transaction(jou
311 int tag_flag;
312 int i;
313 int tag_bytes = journal_tag_bytes(journal);
314 + struct buffer_head *cbh = NULL; /* For transactional checksums */
315 + __u32 crc32_sum = ~0;
318 * First job: lock down the current transaction and wait for
319 @@ -451,38 +542,15 @@ void jbd2_journal_commit_transaction(jou
320 journal_submit_data_buffers(journal, commit_transaction);
323 - * Wait for all previously submitted IO to complete.
324 + * Wait for all previously submitted IO to complete if commit
325 + * record is to be written synchronously.
327 spin_lock(&journal->j_list_lock);
328 - while (commit_transaction->t_locked_list) {
329 - struct buffer_head *bh;
330 + if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
331 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
332 + err = journal_wait_on_locked_list(journal,
333 + commit_transaction);
335 - jh = commit_transaction->t_locked_list->b_tprev;
336 - bh = jh2bh(jh);
337 - get_bh(bh);
338 - if (buffer_locked(bh)) {
339 - spin_unlock(&journal->j_list_lock);
340 - wait_on_buffer(bh);
341 - if (unlikely(!buffer_uptodate(bh)))
342 - err = -EIO;
343 - spin_lock(&journal->j_list_lock);
345 - if (!inverted_lock(journal, bh)) {
346 - put_bh(bh);
347 - spin_lock(&journal->j_list_lock);
348 - continue;
350 - if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
351 - __jbd2_journal_unfile_buffer(jh);
352 - jbd_unlock_bh_state(bh);
353 - jbd2_journal_remove_journal_head(bh);
354 - put_bh(bh);
355 - } else {
356 - jbd_unlock_bh_state(bh);
358 - put_bh(bh);
359 - cond_resched_lock(&journal->j_list_lock);
361 spin_unlock(&journal->j_list_lock);
363 if (err)
364 @@ -656,6 +724,15 @@ void jbd2_journal_commit_transaction(jou
365 start_journal_io:
366 for (i = 0; i < bufs; i++) {
367 struct buffer_head *bh = wbuf[i];
368 + /*
369 + * Compute checksum.
370 + */
371 + if (JBD2_HAS_COMPAT_FEATURE(journal,
372 + JBD2_FEATURE_COMPAT_CHECKSUM)) {
373 + crc32_sum =
374 + jbd2_checksum_data(crc32_sum, bh);
377 lock_buffer(bh);
378 clear_buffer_dirty(bh);
379 set_buffer_uptodate(bh);
380 @@ -672,6 +749,23 @@ start_journal_io:
384 + /* Done it all: now write the commit record asynchronously. */
386 + if (JBD2_HAS_INCOMPAT_FEATURE(journal,
387 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
388 + err = journal_submit_commit_record(journal, commit_transaction,
389 + &cbh, crc32_sum);
390 + if (err)
391 + __jbd2_journal_abort_hard(journal);
393 + spin_lock(&journal->j_list_lock);
394 + err = journal_wait_on_locked_list(journal,
395 + commit_transaction);
396 + spin_unlock(&journal->j_list_lock);
397 + if (err)
398 + __jbd2_journal_abort_hard(journal);
401 /* Lo and behold: we have just managed to send a transaction to
402 the log. Before we can commit it, wait for the IO so far to
403 complete. Control buffers being written are on the
404 @@ -771,8 +865,14 @@ wait_for_iobuf:
406 jbd_debug(3, "JBD: commit phase 6\n");
408 - if (journal_write_commit_record(journal, commit_transaction))
409 - err = -EIO;
410 + if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
411 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
412 + err = journal_submit_commit_record(journal, commit_transaction,
413 + &cbh, crc32_sum);
414 + if (err)
415 + __jbd2_journal_abort_hard(journal);
417 + err = journal_wait_on_commit_record(cbh);
419 if (err)
420 jbd2_journal_abort(journal, err);
421 Index: linux-2.6.24-rc7/fs/jbd2/journal.c
422 ===================================================================
423 --- linux-2.6.24-rc7.orig/fs/jbd2/journal.c 2008-01-16 13:50:13.000000000 -0800
424 +++ linux-2.6.24-rc7/fs/jbd2/journal.c 2008-01-16 13:50:14.000000000 -0800
425 @@ -1578,6 +1578,34 @@ int jbd2_journal_set_features (journal_t
426 return 1;
430 + * jbd2_journal_clear_features () - Clear a given journal feature in the
431 + * superblock
432 + * @journal: Journal to act on.
433 + * @compat: bitmask of compatible features
434 + * @ro: bitmask of features that force read-only mount
435 + * @incompat: bitmask of incompatible features
437 + * Clear a given journal feature as present on the
438 + * superblock. Returns true if the requested features could be reset.
439 + */
440 +int jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
441 + unsigned long ro, unsigned long incompat)
443 + journal_superblock_t *sb;
445 + jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
446 + compat, ro, incompat);
448 + sb = journal->j_superblock;
450 + sb->s_feature_compat &= ~cpu_to_be32(compat);
451 + sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
452 + sb->s_feature_incompat &= ~cpu_to_be32(incompat);
454 + return 1;
456 +EXPORT_SYMBOL(jbd2_journal_clear_features);
459 * int jbd2_journal_update_format () - Update on-disk journal structure.
460 Index: linux-2.6.24-rc7/fs/jbd2/recovery.c
461 ===================================================================
462 --- linux-2.6.24-rc7.orig/fs/jbd2/recovery.c 2008-01-16 13:47:29.000000000 -0800
463 +++ linux-2.6.24-rc7/fs/jbd2/recovery.c 2008-01-16 13:52:00.000000000 -0800
464 @@ -21,6 +21,7 @@
465 #include <linux/jbd2.h>
466 #include <linux/errno.h>
467 #include <linux/slab.h>
468 +#include <linux/crc32.h>
469 #endif
472 @@ -316,6 +317,37 @@ static inline unsigned long long read_ta
473 return block;
477 + * calc_chksums calculates the checksums for the blocks described in the
478 + * descriptor block.
479 + */
480 +static int calc_chksums(journal_t *journal, struct buffer_head *bh,
481 + unsigned long *next_log_block, __u32 *crc32_sum)
483 + int i, num_blks, err;
484 + unsigned io_block;
485 + struct buffer_head *obh;
487 + num_blks = count_tags(journal, bh);
488 + /* Calculate checksum of the descriptor block. */
489 + *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
491 + for (i = 0; i < num_blks; i++) {
492 + io_block = (*next_log_block)++;
493 + wrap(journal, *next_log_block);
494 + err = jread(&obh, journal, io_block);
495 + if (err) {
496 + printk(KERN_ERR "JBD: IO error %d recovering block "
497 + "%u in log\n", err, io_block);
498 + return 1;
499 + } else {
500 + *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
501 + obh->b_size);
504 + return 0;
507 static int do_one_pass(journal_t *journal,
508 struct recovery_info *info, enum passtype pass)
510 @@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journa
511 unsigned int sequence;
512 int blocktype;
513 int tag_bytes = journal_tag_bytes(journal);
514 + __u32 crc32_sum = ~0; /* Transactional Checksums */
516 /* Precompute the maximum metadata descriptors in a descriptor block */
517 int MAX_BLOCKS_PER_DESC;
518 @@ -419,9 +452,23 @@ static int do_one_pass(journal_t *journa
519 switch(blocktype) {
520 case JBD2_DESCRIPTOR_BLOCK:
521 /* If it is a valid descriptor block, replay it
522 - * in pass REPLAY; otherwise, just skip over the
523 - * blocks it describes. */
524 + * in pass REPLAY; if journal_checksums enabled, then
525 + * calculate checksums in PASS_SCAN, otherwise,
526 + * just skip over the blocks it describes. */
527 if (pass != PASS_REPLAY) {
528 + if (pass == PASS_SCAN &&
529 + JBD2_HAS_COMPAT_FEATURE(journal,
530 + JBD2_FEATURE_COMPAT_CHECKSUM) &&
531 + !info->end_transaction) {
532 + if (calc_chksums(journal, bh,
533 + &next_log_block,
534 + &crc32_sum)) {
535 + brelse(bh);
536 + break;
538 + brelse(bh);
539 + continue;
541 next_log_block += count_tags(journal, bh);
542 wrap(journal, next_log_block);
543 brelse(bh);
544 @@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journa
545 continue;
547 case JBD2_COMMIT_BLOCK:
548 - /* Found an expected commit block: not much to
549 - * do other than move on to the next sequence
550 + /* How to differentiate between interrupted commit
551 + * and journal corruption ?
553 + * {nth transaction}
554 + * Checksum Verification Failed
555 + * |
556 + * ____________________
557 + * | |
558 + * async_commit sync_commit
559 + * | |
560 + * | GO TO NEXT "Journal Corruption"
561 + * | TRANSACTION
562 + * |
563 + * {(n+1)th transanction}
564 + * |
565 + * _______|______________
566 + * | |
567 + * Commit block found Commit block not found
568 + * | |
569 + * "Journal Corruption" |
570 + * _____________|_________
571 + * | |
572 + * nth trans corrupt OR nth trans
573 + * and (n+1)th interrupted interrupted
574 + * before commit block
575 + * could reach the disk.
576 + * (Cannot find the difference in above
577 + * mentioned conditions. Hence assume
578 + * "Interrupted Commit".)
579 + */
581 + /* Found an expected commit block: if checksums
582 + * are present verify them in PASS_SCAN; else not
583 + * much to do other than move on to the next sequence
584 * number. */
585 + if (pass == PASS_SCAN &&
586 + JBD2_HAS_COMPAT_FEATURE(journal,
587 + JBD2_FEATURE_COMPAT_CHECKSUM)) {
588 + int chksum_err, chksum_seen;
589 + struct commit_header *cbh =
590 + (struct commit_header *)bh->b_data;
591 + unsigned found_chksum =
592 + be32_to_cpu(cbh->h_chksum[0]);
594 + chksum_err = chksum_seen = 0;
596 + if (info->end_transaction) {
597 + printk(KERN_ERR "JBD: Transaction %u "
598 + "found to be corrupt.\n",
599 + next_commit_ID - 1);
600 + brelse(bh);
601 + break;
604 + if (crc32_sum == found_chksum &&
605 + cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
606 + cbh->h_chksum_size ==
607 + JBD2_CRC32_CHKSUM_SIZE)
608 + chksum_seen = 1;
609 + else if (!(cbh->h_chksum_type == 0 &&
610 + cbh->h_chksum_size == 0 &&
611 + found_chksum == 0 &&
612 + !chksum_seen))
613 + /*
614 + * If fs is mounted using an old kernel and then
615 + * kernel with journal_chksum is used then we
616 + * get a situation where the journal flag has
617 + * checksum flag set but checksums are not
618 + * present i.e chksum = 0, in the individual
619 + * commit blocks.
620 + * Hence to avoid checksum failures, in this
621 + * situation, this extra check is added.
622 + */
623 + chksum_err = 1;
625 + if (chksum_err) {
626 + info->end_transaction = next_commit_ID;
628 + if (!JBD2_HAS_COMPAT_FEATURE(journal,
629 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
630 + printk(KERN_ERR
631 + "JBD: Transaction %u "
632 + "found to be corrupt.\n",
633 + next_commit_ID);
634 + brelse(bh);
635 + break;
638 + crc32_sum = ~0;
640 brelse(bh);
641 next_commit_ID++;
642 continue;
643 @@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journa
644 * transaction marks the end of the valid log.
647 - if (pass == PASS_SCAN)
648 - info->end_transaction = next_commit_ID;
649 - else {
650 + if (pass == PASS_SCAN) {
651 + if (!info->end_transaction)
652 + info->end_transaction = next_commit_ID;
653 + } else {
654 /* It's really bad news if different passes end up at
655 * different places (but possible due to IO errors). */
656 if (info->end_transaction != next_commit_ID) {
657 Index: linux-2.6.24-rc7/include/linux/ext4_fs.h
658 ===================================================================
659 --- linux-2.6.24-rc7.orig/include/linux/ext4_fs.h 2008-01-16 13:50:09.000000000 -0800
660 +++ linux-2.6.24-rc7/include/linux/ext4_fs.h 2008-01-16 13:50:14.000000000 -0800
661 @@ -467,7 +467,8 @@ do { \
662 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
663 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
664 #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
666 +#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
667 +#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
668 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
669 #ifndef _LINUX_EXT2_FS_H
670 #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
671 Index: linux-2.6.24-rc7/include/linux/jbd2.h
672 ===================================================================
673 --- linux-2.6.24-rc7.orig/include/linux/jbd2.h 2008-01-16 13:50:13.000000000 -0800
674 +++ linux-2.6.24-rc7/include/linux/jbd2.h 2008-01-16 13:50:14.000000000 -0800
675 @@ -149,6 +149,28 @@ typedef struct journal_header_s
676 __be32 h_sequence;
677 } journal_header_t;
680 + * Checksum types.
681 + */
682 +#define JBD2_CRC32_CHKSUM 1
683 +#define JBD2_MD5_CHKSUM 2
684 +#define JBD2_SHA1_CHKSUM 3
686 +#define JBD2_CRC32_CHKSUM_SIZE 4
688 +#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
690 + * Commit block header for storing transactional checksums:
691 + */
692 +struct commit_header {
693 + __be32 h_magic;
694 + __be32 h_blocktype;
695 + __be32 h_sequence;
696 + unsigned char h_chksum_type;
697 + unsigned char h_chksum_size;
698 + unsigned char h_padding[2];
699 + __be32 h_chksum[JBD2_CHECKSUM_BYTES];
703 * The block tag: used to describe a single buffer in the journal.
704 @@ -242,14 +264,18 @@ typedef struct journal_superblock_s
705 ((j)->j_format_version >= 2 && \
706 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
708 -#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
709 -#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
710 +#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
712 +#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
713 +#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
714 +#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
716 /* Features known to this kernel version: */
717 -#define JBD2_KNOWN_COMPAT_FEATURES 0
718 +#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
719 #define JBD2_KNOWN_ROCOMPAT_FEATURES 0
720 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
721 - JBD2_FEATURE_INCOMPAT_64BIT)
722 + JBD2_FEATURE_INCOMPAT_64BIT | \
723 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
725 #ifdef __KERNEL__
727 @@ -997,6 +1023,8 @@ extern int jbd2_journal_check_availab
728 (journal_t *, unsigned long, unsigned long, unsigned long);
729 extern int jbd2_journal_set_features
730 (journal_t *, unsigned long, unsigned long, unsigned long);
731 +extern int jbd2_journal_clear_features
732 + (journal_t *, unsigned long, unsigned long, unsigned long);
733 extern int jbd2_journal_create (journal_t *);
734 extern int jbd2_journal_load (journal_t *journal);
735 extern void jbd2_journal_destroy (journal_t *);