add patch avoid-long-hold-times
[ext4-patch-queue.git] / load-jmap-from-journal
blob609bf478c115be57e6782ac9cf3db03fdf42bc0f
1 jbd2: load jmap from journal
3 If the lazy journal feature is enabled, instead of replaying the
4 journal, read the journal into journal map.
6 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
7 ---
8  fs/jbd2/journal.c  |  27 +++++++++---------------
9  fs/jbd2/recovery.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
10  2 files changed, 90 insertions(+), 42 deletions(-)
12 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
13 index 9c097ddfe63f..8060ab4805eb 100644
14 --- a/fs/jbd2/journal.c
15 +++ b/fs/jbd2/journal.c
16 @@ -1276,31 +1276,24 @@ static void journal_fail_superblock (journal_t *journal)
18  /*
19   * Given a journal_t structure, initialise the various fields for
20 - * startup of a new journaling session.  We use this both when creating
21 - * a journal, and after recovering an old journal to reset it for
22 - * subsequent use.
23 + * startup of a new journaling session.
24   */
26  static int journal_reset(journal_t *journal)
27  {
28         journal_superblock_t *sb = journal->j_superblock;
29 -       unsigned long long first, last;
30 +       int free;
32 -       first = be32_to_cpu(sb->s_first);
33 -       last = be32_to_cpu(sb->s_maxlen);
34 -       if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
35 -               printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
36 -                      first, last);
37 +       if (journal->j_first + JBD2_MIN_JOURNAL_BLOCKS > journal->j_last + 1) {
38 +               printk(KERN_ERR "JBD2: Journal too short (blocks %lu-%lu).\n",
39 +                      journal->j_first, journal->j_last);
40                 journal_fail_superblock(journal);
41                 return -EINVAL;
42         }
44 -       journal->j_first = first;
45 -       journal->j_last = last;
47 -       journal->j_head = first;
48 -       journal->j_tail = first;
49 -       journal->j_free = last - first;
50 +       free = journal->j_tail - journal->j_head;
51 +       if (free <= 0)
52 +               free += journal->j_last - journal->j_first;
53 +       journal->j_free = free;
55         journal->j_tail_sequence = journal->j_transaction_sequence;
56         journal->j_commit_sequence = journal->j_transaction_sequence - 1;
57 @@ -1320,7 +1313,7 @@ static int journal_reset(journal_t *journal)
58                         journal->j_tail, journal->j_tail_sequence,
59                         journal->j_errno);
60                 journal->j_flags |= JBD2_FLUSHED;
61 -       } else {
62 +       } else if ((journal->j_flags & JBD2_LAZY) == 0) {
63                 /* Lock here to make assertions happy... */
64                 mutex_lock_io(&journal->j_checkpoint_mutex);
65                 /*
66 diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
67 index da100044566c..7a74ea1860a9 100644
68 --- a/fs/jbd2/recovery.c
69 +++ b/fs/jbd2/recovery.c
70 @@ -22,6 +22,7 @@
71  #include <linux/errno.h>
72  #include <linux/crc32.h>
73  #include <linux/blkdev.h>
74 +#include "jmap.h"
75  #endif
77  /*
78 @@ -32,17 +33,18 @@ struct recovery_info
79  {
80         tid_t           start_transaction;
81         tid_t           end_transaction;
82 +       int             head_block;
84         int             nr_replays;
85         int             nr_revokes;
86         int             nr_revoke_hits;
87  };
89 -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
90 +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY, PASS_JMAP};
91  static int do_one_pass(journal_t *journal,
92                                 struct recovery_info *info, enum passtype pass);
93 -static int scan_revoke_records(journal_t *, struct buffer_head *,
94 -                               tid_t, struct recovery_info *);
95 +static int scan_revoke_records(journal_t *, struct buffer_head *, enum passtype,
96 +                              tid_t, struct recovery_info *);
98  #ifdef __KERNEL__
100 @@ -255,11 +257,16 @@ int jbd2_journal_recover(journal_t *journal)
101         sb = journal->j_superblock;
103         /*
104 +        * Initialize journal's head and tail assuming the recovery
105 +        * was successful and we're not doing lazy journalling.
106 +        */
107 +       journal->j_head = journal->j_tail = journal->j_first;
109 +       /*
110          * The journal superblock's s_start field (the current log head)
111          * is always zero if, and only if, the journal was cleanly
112          * unmounted.
113          */
115         if (!sb->s_start) {
116                 jbd_debug(1, "No recovery required, last transaction %d\n",
117                           be32_to_cpu(sb->s_sequence));
118 @@ -267,11 +274,15 @@ int jbd2_journal_recover(journal_t *journal)
119                 return 0;
120         }
122 -       err = do_one_pass(journal, &info, PASS_SCAN);
123 -       if (!err)
124 -               err = do_one_pass(journal, &info, PASS_REVOKE);
125 -       if (!err)
126 -               err = do_one_pass(journal, &info, PASS_REPLAY);
127 +       if (journal->j_flags & JBD2_LAZY)
128 +               err = do_one_pass(journal, &info, PASS_JMAP);
129 +       else {
130 +               err = do_one_pass(journal, &info, PASS_SCAN);
131 +               if (!err)
132 +                       err = do_one_pass(journal, &info, PASS_REVOKE);
133 +               if (!err)
134 +                       err = do_one_pass(journal, &info, PASS_REPLAY);
135 +       }
137         jbd_debug(1, "JBD2: recovery, exit status %d, "
138                   "recovered transactions %u to %u\n",
139 @@ -279,10 +290,22 @@ int jbd2_journal_recover(journal_t *journal)
140         jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
141                   info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
143 -       /* Restart the log at the next transaction ID, thus invalidating
144 -        * any existing commit records in the log. */
145 +       /* Restart the log at the next transaction ID */
146         journal->j_transaction_sequence = info.end_transaction;
148 +       /*
149 +        * In lazy journalling mode, we need to preserve the existing
150 +        * contents of the journal, so set j_head and j_tail
151 +        * accordingly.
152 +        */
153 +       if (journal->j_flags & JBD2_LAZY) {
154 +               if (err)
155 +                       return err;
156 +               journal->j_head = info.head_block;
157 +               journal->j_tail = be32_to_cpu(sb->s_start);
158 +               return 0;
159 +       }
161         jbd2_journal_clear_revoke(journal);
162         err2 = sync_blockdev(journal->j_fs_dev);
163         if (!err)
164 @@ -431,6 +454,7 @@ static int do_one_pass(journal_t *journal,
165         __u32                   crc32_sum = ~0; /* Transactional Checksums */
166         int                     descr_csum_size = 0;
167         int                     block_error = 0;
168 +       int                     new_txn = 1;
170         /*
171          * First thing is to establish what we expect to find in the log
172 @@ -443,7 +467,7 @@ static int do_one_pass(journal_t *journal,
173         next_log_block = be32_to_cpu(sb->s_start);
175         first_commit_ID = next_commit_ID;
176 -       if (pass == PASS_SCAN)
177 +       if (pass == PASS_SCAN || pass == PASS_JMAP)
178                 info->start_transaction = first_commit_ID;
180         jbd_debug(1, "Starting recovery pass %d\n", pass);
181 @@ -468,7 +492,7 @@ static int do_one_pass(journal_t *journal,
182                  * check right now that we haven't gone past the end of
183                  * the log. */
185 -               if (pass != PASS_SCAN)
186 +               if (pass != PASS_SCAN && pass != PASS_JMAP)
187                         if (tid_geq(next_commit_ID, info->end_transaction))
188                                 break;
190 @@ -484,9 +508,6 @@ static int do_one_pass(journal_t *journal,
191                 if (err)
192                         goto failed;
194 -               next_log_block++;
195 -               wrap(journal, next_log_block);
197                 /* What kind of buffer is it?
198                  *
199                  * If it is a descriptor block, check that it has the
200 @@ -510,6 +531,14 @@ static int do_one_pass(journal_t *journal,
201                         break;
202                 }
204 +               if ((pass == PASS_JMAP) && new_txn) {
205 +                       jbd2_add_new_transaction_infos(journal, sequence, next_log_block);
206 +                       new_txn = 0;
207 +               }
209 +               next_log_block++;
210 +               wrap(journal, next_log_block);
212                 /* OK, we have a valid descriptor block which matches
213                  * all of the sequence number checks.  What are we going
214                  * to do with it?  That depends on the pass... */
215 @@ -535,7 +564,7 @@ static int do_one_pass(journal_t *journal,
216                          * in pass REPLAY; if journal_checksums enabled, then
217                          * calculate checksums in PASS_SCAN, otherwise,
218                          * just skip over the blocks it describes. */
219 -                       if (pass != PASS_REPLAY) {
220 +                       if ((pass != PASS_REPLAY) && (pass != PASS_JMAP)) {
221                                 if (pass == PASS_SCAN &&
222                                     jbd2_has_feature_checksum(journal) &&
223                                     !info->end_transaction) {
224 @@ -562,12 +591,28 @@ static int do_one_pass(journal_t *journal,
225                         while ((tagp - bh->b_data + tag_bytes)
226                                <= journal->j_blocksize - descr_csum_size) {
227                                 unsigned long io_block;
228 +                               unsigned long long log_block;
230                                 tag = (journal_block_tag_t *) tagp;
231                                 flags = be16_to_cpu(tag->t_flags);
233                                 io_block = next_log_block++;
234                                 wrap(journal, next_log_block);
235 +                               if (pass == PASS_JMAP) {
236 +                                       struct blk_mapping map;
238 +                                       err = jbd2_journal_bmap(journal,
239 +                                                               io_block,
240 +                                                               &log_block);
241 +                                       if (err)
242 +                                               goto failed;
243 +                                       map.fsblk = read_tag_block(journal, tag);
244 +                                       map.logblk = log_block;
245 +                                       err = jbd2_add_mapping(journal, &map);
246 +                                       if (err)
247 +                                               goto failed;
248 +                                       goto skip_write;
249 +                               }
250                                 err = jread(&obh, journal, io_block);
251                                 if (err) {
252                                         /* Recover what we can, but
253 @@ -753,6 +798,10 @@ static int do_one_pass(journal_t *journal,
254                                         break;
255                                 }
256                         }
257 +                       if (pass == PASS_JMAP) {
258 +                               jbd2_finish_transaction_infos(journal);
259 +                               new_txn = 1;
260 +                       }
261                         brelse(bh);
262                         next_commit_ID++;
263                         continue;
264 @@ -760,12 +809,12 @@ static int do_one_pass(journal_t *journal,
265                 case JBD2_REVOKE_BLOCK:
266                         /* If we aren't in the REVOKE pass, then we can
267                          * just skip over this block. */
268 -                       if (pass != PASS_REVOKE) {
269 +                       if (pass != PASS_REVOKE && pass != PASS_JMAP) {
270                                 brelse(bh);
271                                 continue;
272                         }
274 -                       err = scan_revoke_records(journal, bh,
275 +                       err = scan_revoke_records(journal, bh, pass,
276                                                   next_commit_ID, info);
277                         brelse(bh);
278                         if (err)
279 @@ -788,9 +837,10 @@ static int do_one_pass(journal_t *journal,
280          * transaction marks the end of the valid log.
281          */
283 -       if (pass == PASS_SCAN) {
284 +       if (pass == PASS_SCAN || pass == PASS_JMAP) {
285                 if (!info->end_transaction)
286                         info->end_transaction = next_commit_ID;
287 +               info->head_block = next_log_block;
288         } else {
289                 /* It's really bad news if different passes end up at
290                  * different places (but possible due to IO errors). */
291 @@ -813,7 +863,8 @@ static int do_one_pass(journal_t *journal,
292  /* Scan a revoke record, marking all blocks mentioned as revoked. */
294  static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
295 -                              tid_t sequence, struct recovery_info *info)
296 +                              enum passtype pass, tid_t sequence,
297 +                              struct recovery_info *info)
299         jbd2_journal_revoke_header_t *header;
300         int offset, max;
301 @@ -839,16 +890,20 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
303         while (offset + record_len <= max) {
304                 unsigned long long blocknr;
305 -               int err;
307                 if (record_len == 4)
308                         blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
309                 else
310                         blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
311                 offset += record_len;
312 -               err = jbd2_journal_set_revoke(journal, blocknr, sequence);
313 -               if (err)
314 -                       return err;
315 +               if (pass == PASS_JMAP)
316 +                       jbd2_jmap_revoke(journal, blocknr);
317 +               else {
318 +                       int err = jbd2_journal_set_revoke(journal, blocknr,
319 +                                                         sequence);
320 +                       if (err)
321 +                               return err;
322 +               }
323                 ++info->nr_revokes;
324         }
325         return 0;