fs/ext4/mmp.c

   1 #include <linux/fs.h>
   2 #include <linux/random.h>
   3 #include <linux/buffer_head.h>
   4 #include <linux/utsname.h>
   5 #include <linux/kthread.h>
   6
   7 #include "ext4.h"
   8
   9 /* Checksumming functions */
  10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  11 {
  12         struct ext4_sb_info *sbi = EXT4_SB(sb);
  13         int offset = offsetof(struct mmp_struct, mmp_checksum);
  14         __u32 csum;
  15
  16         csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  17
  18         return cpu_to_le32(csum);
  19 }
  20
  21 int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  22 {
  23         if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
  24                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
  25                 return 1;
  26
  27         return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  28 }
  29
  30 void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  31 {
  32         if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
  33                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
  34                 return;
  35
  36         mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  37 }
  38
  39 /*
  40  * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  41  * faster.
  42  */
  43 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  44 {
  45         struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  46
  47         /*
  48          * We protect against freezing so that we don't create dirty buffers
  49          * on frozen filesystem.
  50          */
  51         sb_start_write(sb);
  52         ext4_mmp_csum_set(sb, mmp);
  53         mark_buffer_dirty(bh);
  54         lock_buffer(bh);
  55         bh->b_end_io = end_buffer_write_sync;
  56         get_bh(bh);
  57         submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
  58         wait_on_buffer(bh);
  59         sb_end_write(sb);
  60         if (unlikely(!buffer_uptodate(bh)))
  61                 return 1;
  62
  63         return 0;
  64 }
  65
  66 /*
  67  * Read the MMP block. It _must_ be read from disk and hence we clear the
  68  * uptodate flag on the buffer.
  69  */
  70 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  71                           ext4_fsblk_t mmp_block)
  72 {
  73         struct mmp_struct *mmp;
  74
  75         if (*bh)
  76                 clear_buffer_uptodate(*bh);
  77
  78         /* This would be sb_bread(sb, mmp_block), except we need to be sure
  79          * that the MD RAID device cache has been bypassed, and that the read
  80          * is not blocked in the elevator. */
  81         if (!*bh)
  82                 *bh = sb_getblk(sb, mmp_block);
  83         if (!*bh)
  84                 return -ENOMEM;
  85         if (*bh) {
  86                 get_bh(*bh);
  87                 lock_buffer(*bh);
  88                 (*bh)->b_end_io = end_buffer_read_sync;
  89                 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
  90                 wait_on_buffer(*bh);
  91                 if (!buffer_uptodate(*bh)) {
  92                         brelse(*bh);
  93                         *bh = NULL;
  94                 }
  95         }
  96         if (unlikely(!*bh)) {
  97                 ext4_warning(sb, "Error while reading MMP block %llu",
  98                              mmp_block);
  99                 return -EIO;
 100         }
 101
 102         mmp = (struct mmp_struct *)((*bh)->b_data);
 103         if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
 104             !ext4_mmp_csum_verify(sb, mmp))
 105                 return -EINVAL;
 106
 107         return 0;
 108 }
 109
 110 /*
 111  * Dump as much information as possible to help the admin.
 112  */
 113 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 114                     const char *function, unsigned int line, const char *msg)
 115 {
 116         __ext4_warning(sb, function, line, msg);
 117         __ext4_warning(sb, function, line,
 118                        "MMP failure info: last update time: %llu, last update "
 119                        "node: %s, last update device: %s\n",
 120                        (long long unsigned int) le64_to_cpu(mmp->mmp_time),
 121                        mmp->mmp_nodename, mmp->mmp_bdevname);
 122 }
 123
 124 /*
 125  * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 126  */
 127 static int kmmpd(void *data)
 128 {
 129         struct super_block *sb = ((struct mmpd_data *) data)->sb;
 130         struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 131         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 132         struct mmp_struct *mmp;
 133         ext4_fsblk_t mmp_block;
 134         u32 seq = 0;
 135         unsigned long failed_writes = 0;
 136         int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 137         unsigned mmp_check_interval;
 138         unsigned long last_update_time;
 139         unsigned long diff;
 140         int retval;
 141
 142         mmp_block = le64_to_cpu(es->s_mmp_block);
 143         mmp = (struct mmp_struct *)(bh->b_data);
 144         mmp->mmp_time = cpu_to_le64(get_seconds());
 145         /*
 146          * Start with the higher mmp_check_interval and reduce it if
 147          * the MMP block is being updated on time.
 148          */
 149         mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 150                                  EXT4_MMP_MIN_CHECK_INTERVAL);
 151         mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 152         bdevname(bh->b_bdev, mmp->mmp_bdevname);
 153
 154         memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 155                sizeof(mmp->mmp_nodename));
 156
 157         while (!kthread_should_stop()) {
 158                 if (++seq > EXT4_MMP_SEQ_MAX)
 159                         seq = 1;
 160
 161                 mmp->mmp_seq = cpu_to_le32(seq);
 162                 mmp->mmp_time = cpu_to_le64(get_seconds());
 163                 last_update_time = jiffies;
 164
 165                 retval = write_mmp_block(sb, bh);
 166                 /*
 167                  * Don't spew too many error messages. Print one every
 168                  * (s_mmp_update_interval * 60) seconds.
 169                  */
 170                 if (retval) {
 171                         if ((failed_writes % 60) == 0)
 172                                 ext4_error(sb, "Error writing to MMP block");
 173                         failed_writes++;
 174                 }
 175
 176                 if (!(le32_to_cpu(es->s_feature_incompat) &
 177                     EXT4_FEATURE_INCOMPAT_MMP)) {
 178                         ext4_warning(sb, "kmmpd being stopped since MMP feature"
 179                                      " has been disabled.");
 180                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 181                         goto failed;
 182                 }
 183
 184                 if (sb->s_flags & MS_RDONLY) {
 185                         ext4_warning(sb, "kmmpd being stopped since filesystem "
 186                                      "has been remounted as readonly.");
 187                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 188                         goto failed;
 189                 }
 190
 191                 diff = jiffies - last_update_time;
 192                 if (diff < mmp_update_interval * HZ)
 193                         schedule_timeout_interruptible(mmp_update_interval *
 194                                                        HZ - diff);
 195
 196                 /*
 197                  * We need to make sure that more than mmp_check_interval
 198                  * seconds have not passed since writing. If that has happened
 199                  * we need to check if the MMP block is as we left it.
 200                  */
 201                 diff = jiffies - last_update_time;
 202                 if (diff > mmp_check_interval * HZ) {
 203                         struct buffer_head *bh_check = NULL;
 204                         struct mmp_struct *mmp_check;
 205
 206                         retval = read_mmp_block(sb, &bh_check, mmp_block);
 207                         if (retval) {
 208                                 ext4_error(sb, "error reading MMP data: %d",
 209                                            retval);
 210
 211                                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 212                                 goto failed;
 213                         }
 214
 215                         mmp_check = (struct mmp_struct *)(bh_check->b_data);
 216                         if (mmp->mmp_seq != mmp_check->mmp_seq ||
 217                             memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 218                                    sizeof(mmp->mmp_nodename))) {
 219                                 dump_mmp_msg(sb, mmp_check,
 220                                              "Error while updating MMP info. "
 221                                              "The filesystem seems to have been"
 222                                              " multiply mounted.");
 223                                 ext4_error(sb, "abort");
 224                                 goto failed;
 225                         }
 226                         put_bh(bh_check);
 227                 }
 228
 229                  /*
 230                  * Adjust the mmp_check_interval depending on how much time
 231                  * it took for the MMP block to be written.
 232                  */
 233                 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 234                                              EXT4_MMP_MAX_CHECK_INTERVAL),
 235                                          EXT4_MMP_MIN_CHECK_INTERVAL);
 236                 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 237         }
 238
 239         /*
 240          * Unmount seems to be clean.
 241          */
 242         mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 243         mmp->mmp_time = cpu_to_le64(get_seconds());
 244
 245         retval = write_mmp_block(sb, bh);
 246
 247 failed:
 248         kfree(data);
 249         brelse(bh);
 250         return retval;
 251 }
 252
 253 /*
 254  * Get a random new sequence number but make sure it is not greater than
 255  * EXT4_MMP_SEQ_MAX.
 256  */
 257 static unsigned int mmp_new_seq(void)
 258 {
 259         u32 new_seq;
 260
 261         do {
 262                 get_random_bytes(&new_seq, sizeof(u32));
 263         } while (new_seq > EXT4_MMP_SEQ_MAX);
 264
 265         return new_seq;
 266 }
 267
 268 /*
 269  * Protect the filesystem from being mounted more than once.
 270  */
 271 int ext4_multi_mount_protect(struct super_block *sb,
 272                                     ext4_fsblk_t mmp_block)
 273 {
 274         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 275         struct buffer_head *bh = NULL;
 276         struct mmp_struct *mmp = NULL;
 277         struct mmpd_data *mmpd_data;
 278         u32 seq;
 279         unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 280         unsigned int wait_time = 0;
 281         int retval;
 282
 283         if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 284             mmp_block >= ext4_blocks_count(es)) {
 285                 ext4_warning(sb, "Invalid MMP block in superblock");
 286                 goto failed;
 287         }
 288
 289         retval = read_mmp_block(sb, &bh, mmp_block);
 290         if (retval)
 291                 goto failed;
 292
 293         mmp = (struct mmp_struct *)(bh->b_data);
 294
 295         if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 296                 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 297
 298         /*
 299          * If check_interval in MMP block is larger, use that instead of
 300          * update_interval from the superblock.
 301          */
 302         if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 303                 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 304
 305         seq = le32_to_cpu(mmp->mmp_seq);
 306         if (seq == EXT4_MMP_SEQ_CLEAN)
 307                 goto skip;
 308
 309         if (seq == EXT4_MMP_SEQ_FSCK) {
 310                 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 311                 goto failed;
 312         }
 313
 314         wait_time = min(mmp_check_interval * 2 + 1,
 315                         mmp_check_interval + 60);
 316
 317         /* Print MMP interval if more than 20 secs. */
 318         if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 319                 ext4_warning(sb, "MMP interval %u higher than expected, please"
 320                              " wait.\n", wait_time * 2);
 321
 322         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 323                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 324                 goto failed;
 325         }
 326
 327         retval = read_mmp_block(sb, &bh, mmp_block);
 328         if (retval)
 329                 goto failed;
 330         mmp = (struct mmp_struct *)(bh->b_data);
 331         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 332                 dump_mmp_msg(sb, mmp,
 333                              "Device is already active on another node.");
 334                 goto failed;
 335         }
 336
 337 skip:
 338         /*
 339          * write a new random sequence number.
 340          */
 341         seq = mmp_new_seq();
 342         mmp->mmp_seq = cpu_to_le32(seq);
 343
 344         retval = write_mmp_block(sb, bh);
 345         if (retval)
 346                 goto failed;
 347
 348         /*
 349          * wait for MMP interval and check mmp_seq.
 350          */
 351         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 352                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 353                 goto failed;
 354         }
 355
 356         retval = read_mmp_block(sb, &bh, mmp_block);
 357         if (retval)
 358                 goto failed;
 359         mmp = (struct mmp_struct *)(bh->b_data);
 360         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 361                 dump_mmp_msg(sb, mmp,
 362                              "Device is already active on another node.");
 363                 goto failed;
 364         }
 365
 366         mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
 367         if (!mmpd_data) {
 368                 ext4_warning(sb, "not enough memory for mmpd_data");
 369                 goto failed;
 370         }
 371         mmpd_data->sb = sb;
 372         mmpd_data->bh = bh;
 373
 374         /*
 375          * Start a kernel thread to update the MMP block periodically.
 376          */
 377         EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 378                                              bdevname(bh->b_bdev,
 379                                                       mmp->mmp_bdevname));
 380         if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 381                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 382                 kfree(mmpd_data);
 383                 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 384                              sb->s_id);
 385                 goto failed;
 386         }
 387
 388         return 0;
 389
 390 failed:
 391         brelse(bh);
 392         return 1;
 393 }
 394
 395