fs/ocfs2/blockcheck.c

   1 /* -*- mode: c; c-basic-offset: 8; -*-
   2  * vim: noexpandtab sw=8 ts=8 sts=0:
   3  *
   4  * blockcheck.c
   5  *
   6  * Checksum and ECC codes for the OCFS2 userspace library.
   7  *
   8  * Copyright (C) 2006, 2008 Oracle.  All rights reserved.
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public
  12  * License, version 2, as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * General Public License for more details.
  18  */
  19
  20 #include <linux/kernel.h>
  21 #include <linux/types.h>
  22 #include <linux/crc32.h>
  23 #include <linux/buffer_head.h>
  24 #include <linux/bitops.h>
  25 #include <asm/byteorder.h>
  26
  27 #include <cluster/masklog.h>
  28
  29 #include "ocfs2.h"
  30
  31 #include "blockcheck.h"
  32
  33
  34 /*
  35  * We use the following conventions:
  36  *
  37  * d = # data bits
  38  * p = # parity bits
  39  * c = # total code bits (d + p)
  40  */
  41
  42
  43 /*
  44  * Calculate the bit offset in the hamming code buffer based on the bit's
  45  * offset in the data buffer.  Since the hamming code reserves all
  46  * power-of-two bits for parity, the data bit number and the code bit
  47  * number are offest by all the parity bits beforehand.
  48  *
  49  * Recall that bit numbers in hamming code are 1-based.  This function
  50  * takes the 0-based data bit from the caller.
  51  *
  52  * An example.  Take bit 1 of the data buffer.  1 is a power of two (2^0),
  53  * so it's a parity bit.  2 is a power of two (2^1), so it's a parity bit.
  54  * 3 is not a power of two.  So bit 1 of the data buffer ends up as bit 3
  55  * in the code buffer.
  56  *
  57  * The caller can pass in *p if it wants to keep track of the most recent
  58  * number of parity bits added.  This allows the function to start the
  59  * calculation at the last place.
  60  */
  61 static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
  62 {
  63         unsigned int b, p = 0;
  64
  65         /*
  66          * Data bits are 0-based, but we're talking code bits, which
  67          * are 1-based.
  68          */
  69         b = i + 1;
  70
  71         /* Use the cache if it is there */
  72         if (p_cache)
  73                 p = *p_cache;
  74         b += p;
  75
  76         /*
  77          * For every power of two below our bit number, bump our bit.
  78          *
  79          * We compare with (b + 1) because we have to compare with what b
  80          * would be _if_ it were bumped up by the parity bit.  Capice?
  81          *
  82          * p is set above.
  83          */
  84         for (; (1 << p) < (b + 1); p++)
  85                 b++;
  86
  87         if (p_cache)
  88                 *p_cache = p;
  89
  90         return b;
  91 }
  92
  93 /*
  94  * This is the low level encoder function.  It can be called across
  95  * multiple hunks just like the crc32 code.  'd' is the number of bits
  96  * _in_this_hunk_.  nr is the bit offset of this hunk.  So, if you had
  97  * two 512B buffers, you would do it like so:
  98  *
  99  * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
 100  * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
 101  *
 102  * If you just have one buffer, use ocfs2_hamming_encode_block().
 103  */
 104 u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
 105 {
 106         unsigned int i, b, p = 0;
 107
 108         BUG_ON(!d);
 109
 110         /*
 111          * b is the hamming code bit number.  Hamming code specifies a
 112          * 1-based array, but C uses 0-based.  So 'i' is for C, and 'b' is
 113          * for the algorithm.
 114          *
 115          * The i++ in the for loop is so that the start offset passed
 116          * to ocfs2_find_next_bit_set() is one greater than the previously
 117          * found bit.
 118          */
 119         for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
 120         {
 121                 /*
 122                  * i is the offset in this hunk, nr + i is the total bit
 123                  * offset.
 124                  */
 125                 b = calc_code_bit(nr + i, &p);
 126
 127                 /*
 128                  * Data bits in the resultant code are checked by
 129                  * parity bits that are part of the bit number
 130                  * representation.  Huh?
 131                  *
 132                  * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
 133                  * In other words, the parity bit at position 2^k
 134                  * checks bits in positions having bit k set in
 135                  * their binary representation.  Conversely, for
 136                  * instance, bit 13, i.e. 1101(2), is checked by
 137                  * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
 138                  * </wikipedia>
 139                  *
 140                  * Note that 'k' is the _code_ bit number.  'b' in
 141                  * our loop.
 142                  */
 143                 parity ^= b;
 144         }
 145
 146         /* While the data buffer was treated as little endian, the
 147          * return value is in host endian. */
 148         return parity;
 149 }
 150
 151 u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
 152 {
 153         return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
 154 }
 155
 156 /*
 157  * Like ocfs2_hamming_encode(), this can handle hunks.  nr is the bit
 158  * offset of the current hunk.  If bit to be fixed is not part of the
 159  * current hunk, this does nothing.
 160  *
 161  * If you only have one hunk, use ocfs2_hamming_fix_block().
 162  */
 163 void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
 164                        unsigned int fix)
 165 {
 166         unsigned int i, b;
 167
 168         BUG_ON(!d);
 169
 170         /*
 171          * If the bit to fix has an hweight of 1, it's a parity bit.  One
 172          * busted parity bit is its own error.  Nothing to do here.
 173          */
 174         if (hweight32(fix) == 1)
 175                 return;
 176
 177         /*
 178          * nr + d is the bit right past the data hunk we're looking at.
 179          * If fix after that, nothing to do
 180          */
 181         if (fix >= calc_code_bit(nr + d, NULL))
 182                 return;
 183
 184         /*
 185          * nr is the offset in the data hunk we're starting at.  Let's
 186          * start b at the offset in the code buffer.  See hamming_encode()
 187          * for a more detailed description of 'b'.
 188          */
 189         b = calc_code_bit(nr, NULL);
 190         /* If the fix is before this hunk, nothing to do */
 191         if (fix < b)
 192                 return;
 193
 194         for (i = 0; i < d; i++, b++)
 195         {
 196                 /* Skip past parity bits */
 197                 while (hweight32(b) == 1)
 198                         b++;
 199
 200                 /*
 201                  * i is the offset in this data hunk.
 202                  * nr + i is the offset in the total data buffer.
 203                  * b is the offset in the total code buffer.
 204                  *
 205                  * Thus, when b == fix, bit i in the current hunk needs
 206                  * fixing.
 207                  */
 208                 if (b == fix)
 209                 {
 210                         if (ocfs2_test_bit(i, data))
 211                                 ocfs2_clear_bit(i, data);
 212                         else
 213                                 ocfs2_set_bit(i, data);
 214                         break;
 215                 }
 216         }
 217 }
 218
 219 void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
 220                              unsigned int fix)
 221 {
 222         ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
 223 }
 224
 225 /*
 226  * This function generates check information for a block.
 227  * data is the block to be checked.  bc is a pointer to the
 228  * ocfs2_block_check structure describing the crc32 and the ecc.
 229  *
 230  * bc should be a pointer inside data, as the function will
 231  * take care of zeroing it before calculating the check information.  If
 232  * bc does not point inside data, the caller must make sure any inline
 233  * ocfs2_block_check structures are zeroed.
 234  *
 235  * The data buffer must be in on-disk endian (little endian for ocfs2).
 236  * bc will be filled with little-endian values and will be ready to go to
 237  * disk.
 238  */
 239 void ocfs2_block_check_compute(void *data, size_t blocksize,
 240                                struct ocfs2_block_check *bc)
 241 {
 242         u32 crc;
 243         u32 ecc;
 244
 245         memset(bc, 0, sizeof(struct ocfs2_block_check));
 246
 247         crc = crc32_le(~0, data, blocksize);
 248         ecc = ocfs2_hamming_encode_block(data, blocksize);
 249
 250         /*
 251          * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 252          * larger than 16 bits.
 253          */
 254         BUG_ON(ecc > USHORT_MAX);
 255
 256         bc->bc_crc32e = cpu_to_le32(crc);
 257         bc->bc_ecc = cpu_to_le16((u16)ecc);
 258 }
 259
 260 /*
 261  * This function validates existing check information.  Like _compute,
 262  * the function will take care of zeroing bc before calculating check codes.
 263  * If bc is not a pointer inside data, the caller must have zeroed any
 264  * inline ocfs2_block_check structures.
 265  *
 266  * Again, the data passed in should be the on-disk endian.
 267  */
 268 int ocfs2_block_check_validate(void *data, size_t blocksize,
 269                                struct ocfs2_block_check *bc)
 270 {
 271         int rc = 0;
 272         struct ocfs2_block_check check;
 273         u32 crc, ecc;
 274
 275         check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
 276         check.bc_ecc = le16_to_cpu(bc->bc_ecc);
 277
 278         memset(bc, 0, sizeof(struct ocfs2_block_check));
 279
 280         /* Fast path - if the crc32 validates, we're good to go */
 281         crc = crc32_le(~0, data, blocksize);
 282         if (crc == check.bc_crc32e)
 283                 goto out;
 284
 285         mlog(ML_ERROR,
 286              "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
 287              (unsigned int)check.bc_crc32e, (unsigned int)crc);
 288
 289         /* Ok, try ECC fixups */
 290         ecc = ocfs2_hamming_encode_block(data, blocksize);
 291         ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
 292
 293         /* And check the crc32 again */
 294         crc = crc32_le(~0, data, blocksize);
 295         if (crc == check.bc_crc32e)
 296                 goto out;
 297
 298         mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
 299              (unsigned int)check.bc_crc32e, (unsigned int)crc);
 300
 301         rc = -EIO;
 302
 303 out:
 304         bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
 305         bc->bc_ecc = cpu_to_le16(check.bc_ecc);
 306
 307         return rc;
 308 }
 309
 310 /*
 311  * This function generates check information for a list of buffer_heads.
 312  * bhs is the blocks to be checked.  bc is a pointer to the
 313  * ocfs2_block_check structure describing the crc32 and the ecc.
 314  *
 315  * bc should be a pointer inside data, as the function will
 316  * take care of zeroing it before calculating the check information.  If
 317  * bc does not point inside data, the caller must make sure any inline
 318  * ocfs2_block_check structures are zeroed.
 319  *
 320  * The data buffer must be in on-disk endian (little endian for ocfs2).
 321  * bc will be filled with little-endian values and will be ready to go to
 322  * disk.
 323  */
 324 void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
 325                                    struct ocfs2_block_check *bc)
 326 {
 327         int i;
 328         u32 crc, ecc;
 329
 330         BUG_ON(nr < 0);
 331
 332         if (!nr)
 333                 return;
 334
 335         memset(bc, 0, sizeof(struct ocfs2_block_check));
 336
 337         for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
 338                 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
 339                 /*
 340                  * The number of bits in a buffer is obviously b_size*8.
 341                  * The offset of this buffer is b_size*i, so the bit offset
 342                  * of this buffer is b_size*8*i.
 343                  */
 344                 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
 345                                                 bhs[i]->b_size * 8,
 346                                                 bhs[i]->b_size * 8 * i);
 347         }
 348
 349         /*
 350          * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 351          * larger than 16 bits.
 352          */
 353         BUG_ON(ecc > USHORT_MAX);
 354
 355         bc->bc_crc32e = cpu_to_le32(crc);
 356         bc->bc_ecc = cpu_to_le16((u16)ecc);
 357 }
 358
 359 /*
 360  * This function validates existing check information on a list of
 361  * buffer_heads.  Like _compute_bhs, the function will take care of
 362  * zeroing bc before calculating check codes.  If bc is not a pointer
 363  * inside data, the caller must have zeroed any inline
 364  * ocfs2_block_check structures.
 365  *
 366  * Again, the data passed in should be the on-disk endian.
 367  */
 368 int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
 369                                    struct ocfs2_block_check *bc)
 370 {
 371         int i, rc = 0;
 372         struct ocfs2_block_check check;
 373         u32 crc, ecc, fix;
 374
 375         BUG_ON(nr < 0);
 376
 377         if (!nr)
 378                 return 0;
 379
 380         check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
 381         check.bc_ecc = le16_to_cpu(bc->bc_ecc);
 382
 383         memset(bc, 0, sizeof(struct ocfs2_block_check));
 384
 385         /* Fast path - if the crc32 validates, we're good to go */
 386         for (i = 0, crc = ~0; i < nr; i++)
 387                 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
 388         if (crc == check.bc_crc32e)
 389                 goto out;
 390
 391         mlog(ML_ERROR,
 392              "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
 393              (unsigned int)check.bc_crc32e, (unsigned int)crc);
 394
 395         /* Ok, try ECC fixups */
 396         for (i = 0, ecc = 0; i < nr; i++) {
 397                 /*
 398                  * The number of bits in a buffer is obviously b_size*8.
 399                  * The offset of this buffer is b_size*i, so the bit offset
 400                  * of this buffer is b_size*8*i.
 401                  */
 402                 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
 403                                                 bhs[i]->b_size * 8,
 404                                                 bhs[i]->b_size * 8 * i);
 405         }
 406         fix = ecc ^ check.bc_ecc;
 407         for (i = 0; i < nr; i++) {
 408                 /*
 409                  * Try the fix against each buffer.  It will only affect
 410                  * one of them.
 411                  */
 412                 ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
 413                                   bhs[i]->b_size * 8 * i, fix);
 414         }
 415
 416         /* And check the crc32 again */
 417         for (i = 0, crc = ~0; i < nr; i++)
 418                 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
 419         if (crc == check.bc_crc32e)
 420                 goto out;
 421
 422         mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
 423              (unsigned int)check.bc_crc32e, (unsigned int)crc);
 424
 425         rc = -EIO;
 426
 427 out:
 428         bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
 429         bc->bc_ecc = cpu_to_le16(check.bc_ecc);
 430
 431         return rc;
 432 }
 433
 434 /*
 435  * These are the main API.  They check the superblock flag before
 436  * calling the underlying operations.
 437  *
 438  * They expect the buffer(s) to be in disk format.
 439  */
 440 void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
 441                             struct ocfs2_block_check *bc)
 442 {
 443         if (ocfs2_meta_ecc(OCFS2_SB(sb)))
 444                 ocfs2_block_check_compute(data, sb->s_blocksize, bc);
 445 }
 446
 447 int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
 448                             struct ocfs2_block_check *bc)
 449 {
 450         int rc = 0;
 451
 452         if (ocfs2_meta_ecc(OCFS2_SB(sb)))
 453                 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
 454
 455         return rc;
 456 }
 457
 458 void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
 459                                 struct buffer_head **bhs, int nr,
 460                                 struct ocfs2_block_check *bc)
 461 {
 462         if (ocfs2_meta_ecc(OCFS2_SB(sb)))
 463                 ocfs2_block_check_compute_bhs(bhs, nr, bc);
 464 }
 465
 466 int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
 467                                 struct buffer_head **bhs, int nr,
 468                                 struct ocfs2_block_check *bc)
 469 {
 470         int rc = 0;
 471
 472         if (ocfs2_meta_ecc(OCFS2_SB(sb)))
 473                 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
 474
 475         return rc;
 476 }
 477