release/src-rt-6.x.4708/linux/linux-2.6.36/drivers/block/drbd/drbd_vli.h

   1 /*
   2 -*- linux-c -*-
   3    drbd_receiver.c
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23  */
  24
  25 #ifndef _DRBD_VLI_H
  26 #define _DRBD_VLI_H
  27
  28 /*
  29  * At a granularity of 4KiB storage represented per bit,
  30  * and stroage sizes of several TiB,
  31  * and possibly small-bandwidth replication,
  32  * the bitmap transfer time can take much too long,
  33  * if transmitted in plain text.
  34  *
  35  * We try to reduce the transfered bitmap information
  36  * by encoding runlengths of bit polarity.
  37  *
  38  * We never actually need to encode a "zero" (runlengths are positive).
  39  * But then we have to store the value of the first bit.
  40  * The first bit of information thus shall encode if the first runlength
  41  * gives the number of set or unset bits.
  42  *
  43  * We assume that large areas are either completely set or unset,
  44  * which gives good compression with any runlength method,
  45  * even when encoding the runlength as fixed size 32bit/64bit integers.
  46  *
  47  * Still, there may be areas where the polarity flips every few bits,
  48  * and encoding the runlength sequence of those areas with fix size
  49  * integers would be much worse than plaintext.
  50  *
  51  * We want to encode small runlength values with minimum code length,
  52  * while still being able to encode a Huge run of all zeros.
  53  *
  54  * Thus we need a Variable Length Integer encoding, VLI.
  55  *
  56  * For some cases, we produce more code bits than plaintext input.
  57  * We need to send incompressible chunks as plaintext, skip over them
  58  * and then see if the next chunk compresses better.
  59  *
  60  * We don't care too much about "excellent" compression ratio for large
  61  * runlengths (all set/all clear): whether we achieve a factor of 100
  62  * or 1000 is not that much of an issue.
  63  * We do not want to waste too much on short runlengths in the "noisy"
  64  * parts of the bitmap, though.
  65  *
  66  * There are endless variants of VLI, we experimented with:
  67  *  * simple byte-based
  68  *  * various bit based with different code word length.
  69  *
  70  * To avoid yet an other configuration parameter (choice of bitmap compression
  71  * algorithm) which was difficult to explain and tune, we just chose the one
  72  * variant that turned out best in all test cases.
  73  * Based on real world usage patterns, with device sizes ranging from a few GiB
  74  * to several TiB, file server/mailserver/webserver/mysql/postgress,
  75  * mostly idle to really busy, the all time winner (though sometimes only
  76  * marginally better) is:
  77  */
  78
  79
  80 /* compression "table":
  81  transmitted   x                                0.29
  82  as plaintext x                                  ........................
  83              x                                   ........................
  84             x                                    ........................
  85            x    0.59                         0.21........................
  86           x      ........................................................
  87          x       .. c ...................................................
  88         x    0.44.. o ...................................................
  89        x .......... d ...................................................
  90       x  .......... e ...................................................
  91      X.............   ...................................................
  92     x.............. b ...................................................
  93 2.0x............... i ...................................................
  94  #X................ t ...................................................
  95  #................. s ...........................  plain bits  ..........
  96 -+-----------------------------------------------------------------------
  97  1             16              32                              64
  98 */
  99
 100 /* LEVEL: (total bits, prefix bits, prefix value),
 101  * sorted ascending by number of total bits.
 102  * The rest of the code table is calculated at compiletime from this. */
 103
 104 /* fibonacci data 1, 1, ... */
 105 #define VLI_L_1_1() do { \
 106         LEVEL( 2, 1, 0x00); \
 107         LEVEL( 3, 2, 0x01); \
 108         LEVEL( 5, 3, 0x03); \
 109         LEVEL( 7, 4, 0x07); \
 110         LEVEL(10, 5, 0x0f); \
 111         LEVEL(14, 6, 0x1f); \
 112         LEVEL(21, 8, 0x3f); \
 113         LEVEL(29, 8, 0x7f); \
 114         LEVEL(42, 8, 0xbf); \
 115         LEVEL(64, 8, 0xff); \
 116         } while (0)
 117
 118 /* finds a suitable level to decode the least significant part of in.
 119  * returns number of bits consumed.
 120  *
 121  * BUG() for bad input, as that would mean a buggy code table. */
 122 static inline int vli_decode_bits(u64 *out, const u64 in)
 123 {
 124         u64 adj = 1;
 125
 126 #define LEVEL(t,b,v)                                    \
 127         do {                                            \
 128                 if ((in & ((1 << b) -1)) == v) {        \
 129                         *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
 130                         return t;                       \
 131                 }                                       \
 132                 adj += 1ULL << (t - b);                 \
 133         } while (0)
 134
 135         VLI_L_1_1();
 136
 137         /* NOT REACHED, if VLI_LEVELS code table is defined properly */
 138         BUG();
 139 #undef LEVEL
 140 }
 141
 142 /* return number of code bits needed,
 143  * or negative error number */
 144 static inline int __vli_encode_bits(u64 *out, const u64 in)
 145 {
 146         u64 max = 0;
 147         u64 adj = 1;
 148
 149         if (in == 0)
 150                 return -EINVAL;
 151
 152 #define LEVEL(t,b,v) do {               \
 153                 max += 1ULL << (t - b); \
 154                 if (in <= max) {        \
 155                         if (out)        \
 156                                 *out = ((in - adj) << b) | v;   \
 157                         return t;       \
 158                 }                       \
 159                 adj = max + 1;          \
 160         } while (0)
 161
 162         VLI_L_1_1();
 163
 164         return -EOVERFLOW;
 165 #undef LEVEL
 166 }
 167
 168 #undef VLI_L_1_1
 169
 170 /* code from here down is independend of actually used bit code */
 171
 172 /*
 173  * Code length is determined by some unique (e.g. unary) prefix.
 174  * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
 175  * not a byte stream.
 176  */
 177
 178 /* for the bitstream, we need a cursor */
 179 struct bitstream_cursor {
 180         /* the current byte */
 181         u8 *b;
 182         /* the current bit within *b, nomalized: 0..7 */
 183         unsigned int bit;
 184 };
 185
 186 /* initialize cursor to point to first bit of stream */
 187 static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
 188 {
 189         cur->b = s;
 190         cur->bit = 0;
 191 }
 192
 193 /* advance cursor by that many bits; maximum expected input value: 64,
 194  * but depending on VLI implementation, it may be more. */
 195 static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
 196 {
 197         bits += cur->bit;
 198         cur->b = cur->b + (bits >> 3);
 199         cur->bit = bits & 7;
 200 }
 201
 202 /* the bitstream itself knows its length */
 203 struct bitstream {
 204         struct bitstream_cursor cur;
 205         unsigned char *buf;
 206         size_t buf_len;         /* in bytes */
 207
 208         /* for input stream:
 209          * number of trailing 0 bits for padding
 210          * total number of valid bits in stream: buf_len * 8 - pad_bits */
 211         unsigned int pad_bits;
 212 };
 213
 214 static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
 215 {
 216         bs->buf = s;
 217         bs->buf_len = len;
 218         bs->pad_bits = pad_bits;
 219         bitstream_cursor_reset(&bs->cur, bs->buf);
 220 }
 221
 222 static inline void bitstream_rewind(struct bitstream *bs)
 223 {
 224         bitstream_cursor_reset(&bs->cur, bs->buf);
 225         memset(bs->buf, 0, bs->buf_len);
 226 }
 227
 228 /* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
 229  * Ignores "pad_bits".
 230  * Returns zero if bits == 0 (nothing to do).
 231  * Returns number of bits used if successful.
 232  *
 233  * If there is not enough room left in bitstream,
 234  * leaves bitstream unchanged and returns -ENOBUFS.
 235  */
 236 static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
 237 {
 238         unsigned char *b = bs->cur.b;
 239         unsigned int tmp;
 240
 241         if (bits == 0)
 242                 return 0;
 243
 244         if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
 245                 return -ENOBUFS;
 246
 247         /* paranoia: strip off hi bits; they should not be set anyways. */
 248         if (bits < 64)
 249                 val &= ~0ULL >> (64 - bits);
 250
 251         *b++ |= (val & 0xff) << bs->cur.bit;
 252
 253         for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
 254                 *b++ |= (val >> tmp) & 0xff;
 255
 256         bitstream_cursor_advance(&bs->cur, bits);
 257         return bits;
 258 }
 259
 260 /* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
 261  *
 262  * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
 263  *
 264  * If there are less than the requested number of valid bits left in the
 265  * bitstream, still fetches all available bits.
 266  *
 267  * Returns number of actually fetched bits.
 268  */
 269 static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
 270 {
 271         u64 val;
 272         unsigned int n;
 273
 274         if (bits > 64)
 275                 return -EINVAL;
 276
 277         if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
 278                 bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
 279                         - bs->cur.bit - bs->pad_bits;
 280
 281         if (bits == 0) {
 282                 *out = 0;
 283                 return 0;
 284         }
 285
 286         /* get the high bits */
 287         val = 0;
 288         n = (bs->cur.bit + bits + 7) >> 3;
 289         /* n may be at most 9, if cur.bit + bits > 64 */
 290         /* which means this copies at most 8 byte */
 291         if (n) {
 292                 memcpy(&val, bs->cur.b+1, n - 1);
 293                 val = le64_to_cpu(val) << (8 - bs->cur.bit);
 294         }
 295
 296         /* we still need the low bits */
 297         val |= bs->cur.b[0] >> bs->cur.bit;
 298
 299         /* and mask out bits we don't want */
 300         val &= ~0ULL >> (64 - bits);
 301
 302         bitstream_cursor_advance(&bs->cur, bits);
 303         *out = val;
 304
 305         return bits;
 306 }
 307
 308 /* encodes @in as vli into @bs;
 309
 310  * return values
 311  *  > 0: number of bits successfully stored in bitstream
 312  * -ENOBUFS @bs is full
 313  * -EINVAL input zero (invalid)
 314  * -EOVERFLOW input too large for this vli code (invalid)
 315  */
 316 static inline int vli_encode_bits(struct bitstream *bs, u64 in)
 317 {
 318         u64 code = code;
 319         int bits = __vli_encode_bits(&code, in);
 320
 321         if (bits <= 0)
 322                 return bits;
 323
 324         return bitstream_put_bits(bs, code, bits);
 325 }
 326
 327 #endif