drivers/md/dm-snap-persistent.c

   1 /*
   2  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
   3  * Copyright (C) 2006-2008 Red Hat GmbH
   4  *
   5  * This file is released under the GPL.
   6  */
   7
   8 #include "dm-exception-store.h"
   9
  10 #include <linux/mm.h>
  11 #include <linux/pagemap.h>
  12 #include <linux/vmalloc.h>
  13 #include <linux/slab.h>
  14 #include <linux/dm-io.h>
  15
  16 #define DM_MSG_PREFIX "persistent snapshot"
  17 #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32        /* 16KB */
  18
  19 /*-----------------------------------------------------------------
  20  * Persistent snapshots, by persistent we mean that the snapshot
  21  * will survive a reboot.
  22  *---------------------------------------------------------------*/
  23
  24 /*
  25  * We need to store a record of which parts of the origin have
  26  * been copied to the snapshot device.  The snapshot code
  27  * requires that we copy exception chunks to chunk aligned areas
  28  * of the COW store.  It makes sense therefore, to store the
  29  * metadata in chunk size blocks.
  30  *
  31  * There is no backward or forward compatibility implemented,
  32  * snapshots with different disk versions than the kernel will
  33  * not be usable.  It is expected that "lvcreate" will blank out
  34  * the start of a fresh COW device before calling the snapshot
  35  * constructor.
  36  *
  37  * The first chunk of the COW device just contains the header.
  38  * After this there is a chunk filled with exception metadata,
  39  * followed by as many exception chunks as can fit in the
  40  * metadata areas.
  41  *
  42  * All on disk structures are in little-endian format.  The end
  43  * of the exceptions info is indicated by an exception with a
  44  * new_chunk of 0, which is invalid since it would point to the
  45  * header chunk.
  46  */
  47
  48 /*
  49  * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
  50  */
  51 #define SNAP_MAGIC 0x70416e53
  52
  53 /*
  54  * The on-disk version of the metadata.
  55  */
  56 #define SNAPSHOT_DISK_VERSION 1
  57
  58 #define NUM_SNAPSHOT_HDR_CHUNKS 1
  59
  60 struct disk_header {
  61         __le32 magic;
  62
  63         /*
  64          * Is this snapshot valid.  There is no way of recovering
  65          * an invalid snapshot.
  66          */
  67         __le32 valid;
  68
  69         /*
  70          * Simple, incrementing version. no backward
  71          * compatibility.
  72          */
  73         __le32 version;
  74
  75         /* In sectors */
  76         __le32 chunk_size;
  77 } __packed;
  78
  79 struct disk_exception {
  80         __le64 old_chunk;
  81         __le64 new_chunk;
  82 } __packed;
  83
  84 struct core_exception {
  85         uint64_t old_chunk;
  86         uint64_t new_chunk;
  87 };
  88
  89 struct commit_callback {
  90         void (*callback)(void *, int success);
  91         void *context;
  92 };
  93
  94 /*
  95  * The top level structure for a persistent exception store.
  96  */
  97 struct pstore {
  98         struct dm_exception_store *store;
  99         int version;
 100         int valid;
 101         uint32_t exceptions_per_area;
 102
 103         /*
 104          * Now that we have an asynchronous kcopyd there is no
 105          * need for large chunk sizes, so it wont hurt to have a
 106          * whole chunks worth of metadata in memory at once.
 107          */
 108         void *area;
 109
 110         /*
 111          * An area of zeros used to clear the next area.
 112          */
 113         void *zero_area;
 114
 115         /*
 116          * An area used for header. The header can be written
 117          * concurrently with metadata (when invalidating the snapshot),
 118          * so it needs a separate buffer.
 119          */
 120         void *header_area;
 121
 122         /*
 123          * Used to keep track of which metadata area the data in
 124          * 'chunk' refers to.
 125          */
 126         chunk_t current_area;
 127
 128         /*
 129          * The next free chunk for an exception.
 130          *
 131          * When creating exceptions, all the chunks here and above are
 132          * free.  It holds the next chunk to be allocated.  On rare
 133          * occasions (e.g. after a system crash) holes can be left in
 134          * the exception store because chunks can be committed out of
 135          * order.
 136          *
 137          * When merging exceptions, it does not necessarily mean all the
 138          * chunks here and above are free.  It holds the value it would
 139          * have held if all chunks had been committed in order of
 140          * allocation.  Consequently the value may occasionally be
 141          * slightly too low, but since it's only used for 'status' and
 142          * it can never reach its minimum value too early this doesn't
 143          * matter.
 144          */
 145
 146         chunk_t next_free;
 147
 148         /*
 149          * The index of next free exception in the current
 150          * metadata area.
 151          */
 152         uint32_t current_committed;
 153
 154         atomic_t pending_count;
 155         uint32_t callback_count;
 156         struct commit_callback *callbacks;
 157         struct dm_io_client *io_client;
 158
 159         struct workqueue_struct *metadata_wq;
 160 };
 161
 162 static int alloc_area(struct pstore *ps)
 163 {
 164         int r = -ENOMEM;
 165         size_t len;
 166
 167         len = ps->store->chunk_size << SECTOR_SHIFT;
 168
 169         /*
 170          * Allocate the chunk_size block of memory that will hold
 171          * a single metadata area.
 172          */
 173         ps->area = vmalloc(len);
 174         if (!ps->area)
 175                 goto err_area;
 176
 177         ps->zero_area = vzalloc(len);
 178         if (!ps->zero_area)
 179                 goto err_zero_area;
 180
 181         ps->header_area = vmalloc(len);
 182         if (!ps->header_area)
 183                 goto err_header_area;
 184
 185         return 0;
 186
 187 err_header_area:
 188         vfree(ps->zero_area);
 189
 190 err_zero_area:
 191         vfree(ps->area);
 192
 193 err_area:
 194         return r;
 195 }
 196
 197 static void free_area(struct pstore *ps)
 198 {
 199         if (ps->area)
 200                 vfree(ps->area);
 201         ps->area = NULL;
 202
 203         if (ps->zero_area)
 204                 vfree(ps->zero_area);
 205         ps->zero_area = NULL;
 206
 207         if (ps->header_area)
 208                 vfree(ps->header_area);
 209         ps->header_area = NULL;
 210 }
 211
 212 struct mdata_req {
 213         struct dm_io_region *where;
 214         struct dm_io_request *io_req;
 215         struct work_struct work;
 216         int result;
 217 };
 218
 219 static void do_metadata(struct work_struct *work)
 220 {
 221         struct mdata_req *req = container_of(work, struct mdata_req, work);
 222
 223         req->result = dm_io(req->io_req, 1, req->where, NULL);
 224 }
 225
 226 /*
 227  * Read or write a chunk aligned and sized block of data from a device.
 228  */
 229 static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 230                     int metadata)
 231 {
 232         struct dm_io_region where = {
 233                 .bdev = dm_snap_cow(ps->store->snap)->bdev,
 234                 .sector = ps->store->chunk_size * chunk,
 235                 .count = ps->store->chunk_size,
 236         };
 237         struct dm_io_request io_req = {
 238                 .bi_rw = rw,
 239                 .mem.type = DM_IO_VMA,
 240                 .mem.ptr.vma = area,
 241                 .client = ps->io_client,
 242                 .notify.fn = NULL,
 243         };
 244         struct mdata_req req;
 245
 246         if (!metadata)
 247                 return dm_io(&io_req, 1, &where, NULL);
 248
 249         req.where = &where;
 250         req.io_req = &io_req;
 251
 252         /*
 253          * Issue the synchronous I/O from a different thread
 254          * to avoid generic_make_request recursion.
 255          */
 256         INIT_WORK_ONSTACK(&req.work, do_metadata);
 257         queue_work(ps->metadata_wq, &req.work);
 258         flush_work(&req.work);
 259
 260         return req.result;
 261 }
 262
 263 /*
 264  * Convert a metadata area index to a chunk index.
 265  */
 266 static chunk_t area_location(struct pstore *ps, chunk_t area)
 267 {
 268         return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
 269 }
 270
 271 /*
 272  * Read or write a metadata area.  Remembering to skip the first
 273  * chunk which holds the header.
 274  */
 275 static int area_io(struct pstore *ps, int rw)
 276 {
 277         int r;
 278         chunk_t chunk;
 279
 280         chunk = area_location(ps, ps->current_area);
 281
 282         r = chunk_io(ps, ps->area, chunk, rw, 0);
 283         if (r)
 284                 return r;
 285
 286         return 0;
 287 }
 288
 289 static void zero_memory_area(struct pstore *ps)
 290 {
 291         memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 292 }
 293
 294 static int zero_disk_area(struct pstore *ps, chunk_t area)
 295 {
 296         return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
 297 }
 298
 299 static int read_header(struct pstore *ps, int *new_snapshot)
 300 {
 301         int r;
 302         struct disk_header *dh;
 303         unsigned chunk_size;
 304         int chunk_size_supplied = 1;
 305         char *chunk_err;
 306
 307         /*
 308          * Use default chunk size (or logical_block_size, if larger)
 309          * if none supplied
 310          */
 311         if (!ps->store->chunk_size) {
 312                 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
 313                     bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
 314                                             bdev) >> 9);
 315                 ps->store->chunk_mask = ps->store->chunk_size - 1;
 316                 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 317                 chunk_size_supplied = 0;
 318         }
 319
 320         ps->io_client = dm_io_client_create();
 321         if (IS_ERR(ps->io_client))
 322                 return PTR_ERR(ps->io_client);
 323
 324         r = alloc_area(ps);
 325         if (r)
 326                 return r;
 327
 328         r = chunk_io(ps, ps->header_area, 0, READ, 1);
 329         if (r)
 330                 goto bad;
 331
 332         dh = ps->header_area;
 333
 334         if (le32_to_cpu(dh->magic) == 0) {
 335                 *new_snapshot = 1;
 336                 return 0;
 337         }
 338
 339         if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
 340                 DMWARN("Invalid or corrupt snapshot");
 341                 r = -ENXIO;
 342                 goto bad;
 343         }
 344
 345         *new_snapshot = 0;
 346         ps->valid = le32_to_cpu(dh->valid);
 347         ps->version = le32_to_cpu(dh->version);
 348         chunk_size = le32_to_cpu(dh->chunk_size);
 349
 350         if (ps->store->chunk_size == chunk_size)
 351                 return 0;
 352
 353         if (chunk_size_supplied)
 354                 DMWARN("chunk size %u in device metadata overrides "
 355                        "table chunk size of %u.",
 356                        chunk_size, ps->store->chunk_size);
 357
 358         /* We had a bogus chunk_size. Fix stuff up. */
 359         free_area(ps);
 360
 361         r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
 362                                               &chunk_err);
 363         if (r) {
 364                 DMERR("invalid on-disk chunk size %u: %s.",
 365                       chunk_size, chunk_err);
 366                 return r;
 367         }
 368
 369         r = alloc_area(ps);
 370         return r;
 371
 372 bad:
 373         free_area(ps);
 374         return r;
 375 }
 376
 377 static int write_header(struct pstore *ps)
 378 {
 379         struct disk_header *dh;
 380
 381         memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 382
 383         dh = ps->header_area;
 384         dh->magic = cpu_to_le32(SNAP_MAGIC);
 385         dh->valid = cpu_to_le32(ps->valid);
 386         dh->version = cpu_to_le32(ps->version);
 387         dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 388
 389         return chunk_io(ps, ps->header_area, 0, WRITE, 1);
 390 }
 391
 392 /*
 393  * Access functions for the disk exceptions, these do the endian conversions.
 394  */
 395 static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
 396 {
 397         BUG_ON(index >= ps->exceptions_per_area);
 398
 399         return ((struct disk_exception *) ps->area) + index;
 400 }
 401
 402 static void read_exception(struct pstore *ps,
 403                            uint32_t index, struct core_exception *result)
 404 {
 405         struct disk_exception *de = get_exception(ps, index);
 406
 407         /* copy it */
 408         result->old_chunk = le64_to_cpu(de->old_chunk);
 409         result->new_chunk = le64_to_cpu(de->new_chunk);
 410 }
 411
 412 static void write_exception(struct pstore *ps,
 413                             uint32_t index, struct core_exception *e)
 414 {
 415         struct disk_exception *de = get_exception(ps, index);
 416
 417         /* copy it */
 418         de->old_chunk = cpu_to_le64(e->old_chunk);
 419         de->new_chunk = cpu_to_le64(e->new_chunk);
 420 }
 421
 422 static void clear_exception(struct pstore *ps, uint32_t index)
 423 {
 424         struct disk_exception *de = get_exception(ps, index);
 425
 426         /* clear it */
 427         de->old_chunk = 0;
 428         de->new_chunk = 0;
 429 }
 430
 431 /*
 432  * Registers the exceptions that are present in the current area.
 433  * 'full' is filled in to indicate if the area has been
 434  * filled.
 435  */
 436 static int insert_exceptions(struct pstore *ps,
 437                              int (*callback)(void *callback_context,
 438                                              chunk_t old, chunk_t new),
 439                              void *callback_context,
 440                              int *full)
 441 {
 442         int r;
 443         unsigned int i;
 444         struct core_exception e;
 445
 446         /* presume the area is full */
 447         *full = 1;
 448
 449         for (i = 0; i < ps->exceptions_per_area; i++) {
 450                 read_exception(ps, i, &e);
 451
 452                 /*
 453                  * If the new_chunk is pointing at the start of
 454                  * the COW device, where the first metadata area
 455                  * is we know that we've hit the end of the
 456                  * exceptions.  Therefore the area is not full.
 457                  */
 458                 if (e.new_chunk == 0LL) {
 459                         ps->current_committed = i;
 460                         *full = 0;
 461                         break;
 462                 }
 463
 464                 /*
 465                  * Keep track of the start of the free chunks.
 466                  */
 467                 if (ps->next_free <= e.new_chunk)
 468                         ps->next_free = e.new_chunk + 1;
 469
 470                 /*
 471                  * Otherwise we add the exception to the snapshot.
 472                  */
 473                 r = callback(callback_context, e.old_chunk, e.new_chunk);
 474                 if (r)
 475                         return r;
 476         }
 477
 478         return 0;
 479 }
 480
 481 static int read_exceptions(struct pstore *ps,
 482                            int (*callback)(void *callback_context, chunk_t old,
 483                                            chunk_t new),
 484                            void *callback_context)
 485 {
 486         int r, full = 1;
 487
 488         /*
 489          * Keeping reading chunks and inserting exceptions until
 490          * we find a partially full area.
 491          */
 492         for (ps->current_area = 0; full; ps->current_area++) {
 493                 r = area_io(ps, READ);
 494                 if (r)
 495                         return r;
 496
 497                 r = insert_exceptions(ps, callback, callback_context, &full);
 498                 if (r)
 499                         return r;
 500         }
 501
 502         ps->current_area--;
 503
 504         return 0;
 505 }
 506
 507 static struct pstore *get_info(struct dm_exception_store *store)
 508 {
 509         return (struct pstore *) store->context;
 510 }
 511
 512 static void persistent_usage(struct dm_exception_store *store,
 513                              sector_t *total_sectors,
 514                              sector_t *sectors_allocated,
 515                              sector_t *metadata_sectors)
 516 {
 517         struct pstore *ps = get_info(store);
 518
 519         *sectors_allocated = ps->next_free * store->chunk_size;
 520         *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
 521
 522         /*
 523          * First chunk is the fixed header.
 524          * Then there are (ps->current_area + 1) metadata chunks, each one
 525          * separated from the next by ps->exceptions_per_area data chunks.
 526          */
 527         *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
 528                             store->chunk_size;
 529 }
 530
 531 static void persistent_dtr(struct dm_exception_store *store)
 532 {
 533         struct pstore *ps = get_info(store);
 534
 535         destroy_workqueue(ps->metadata_wq);
 536
 537         /* Created in read_header */
 538         if (ps->io_client)
 539                 dm_io_client_destroy(ps->io_client);
 540         free_area(ps);
 541
 542         /* Allocated in persistent_read_metadata */
 543         if (ps->callbacks)
 544                 vfree(ps->callbacks);
 545
 546         kfree(ps);
 547 }
 548
 549 static int persistent_read_metadata(struct dm_exception_store *store,
 550                                     int (*callback)(void *callback_context,
 551                                                     chunk_t old, chunk_t new),
 552                                     void *callback_context)
 553 {
 554         int r, uninitialized_var(new_snapshot);
 555         struct pstore *ps = get_info(store);
 556
 557         /*
 558          * Read the snapshot header.
 559          */
 560         r = read_header(ps, &new_snapshot);
 561         if (r)
 562                 return r;
 563
 564         /*
 565          * Now we know correct chunk_size, complete the initialisation.
 566          */
 567         ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
 568                                   sizeof(struct disk_exception);
 569         ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
 570                                    sizeof(*ps->callbacks));
 571         if (!ps->callbacks)
 572                 return -ENOMEM;
 573
 574         /*
 575          * Do we need to setup a new snapshot ?
 576          */
 577         if (new_snapshot) {
 578                 r = write_header(ps);
 579                 if (r) {
 580                         DMWARN("write_header failed");
 581                         return r;
 582                 }
 583
 584                 ps->current_area = 0;
 585                 zero_memory_area(ps);
 586                 r = zero_disk_area(ps, 0);
 587                 if (r)
 588                         DMWARN("zero_disk_area(0) failed");
 589                 return r;
 590         }
 591         /*
 592          * Sanity checks.
 593          */
 594         if (ps->version != SNAPSHOT_DISK_VERSION) {
 595                 DMWARN("unable to handle snapshot disk version %d",
 596                        ps->version);
 597                 return -EINVAL;
 598         }
 599
 600         /*
 601          * Metadata are valid, but snapshot is invalidated
 602          */
 603         if (!ps->valid)
 604                 return 1;
 605
 606         /*
 607          * Read the metadata.
 608          */
 609         r = read_exceptions(ps, callback, callback_context);
 610
 611         return r;
 612 }
 613
 614 static int persistent_prepare_exception(struct dm_exception_store *store,
 615                                         struct dm_exception *e)
 616 {
 617         struct pstore *ps = get_info(store);
 618         uint32_t stride;
 619         chunk_t next_free;
 620         sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
 621
 622         /* Is there enough room ? */
 623         if (size < ((ps->next_free + 1) * store->chunk_size))
 624                 return -ENOSPC;
 625
 626         e->new_chunk = ps->next_free;
 627
 628         /*
 629          * Move onto the next free pending, making sure to take
 630          * into account the location of the metadata chunks.
 631          */
 632         stride = (ps->exceptions_per_area + 1);
 633         next_free = ++ps->next_free;
 634         if (sector_div(next_free, stride) == 1)
 635                 ps->next_free++;
 636
 637         atomic_inc(&ps->pending_count);
 638         return 0;
 639 }
 640
 641 static void persistent_commit_exception(struct dm_exception_store *store,
 642                                         struct dm_exception *e,
 643                                         void (*callback) (void *, int success),
 644                                         void *callback_context)
 645 {
 646         unsigned int i;
 647         struct pstore *ps = get_info(store);
 648         struct core_exception ce;
 649         struct commit_callback *cb;
 650
 651         ce.old_chunk = e->old_chunk;
 652         ce.new_chunk = e->new_chunk;
 653         write_exception(ps, ps->current_committed++, &ce);
 654
 655         /*
 656          * Add the callback to the back of the array.  This code
 657          * is the only place where the callback array is
 658          * manipulated, and we know that it will never be called
 659          * multiple times concurrently.
 660          */
 661         cb = ps->callbacks + ps->callback_count++;
 662         cb->callback = callback;
 663         cb->context = callback_context;
 664
 665         /*
 666          * If there are exceptions in flight and we have not yet
 667          * filled this metadata area there's nothing more to do.
 668          */
 669         if (!atomic_dec_and_test(&ps->pending_count) &&
 670             (ps->current_committed != ps->exceptions_per_area))
 671                 return;
 672
 673         /*
 674          * If we completely filled the current area, then wipe the next one.
 675          */
 676         if ((ps->current_committed == ps->exceptions_per_area) &&
 677             zero_disk_area(ps, ps->current_area + 1))
 678                 ps->valid = 0;
 679
 680         /*
 681          * Commit exceptions to disk.
 682          */
 683         if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
 684                 ps->valid = 0;
 685
 686         /*
 687          * Advance to the next area if this one is full.
 688          */
 689         if (ps->current_committed == ps->exceptions_per_area) {
 690                 ps->current_committed = 0;
 691                 ps->current_area++;
 692                 zero_memory_area(ps);
 693         }
 694
 695         for (i = 0; i < ps->callback_count; i++) {
 696                 cb = ps->callbacks + i;
 697                 cb->callback(cb->context, ps->valid);
 698         }
 699
 700         ps->callback_count = 0;
 701 }
 702
 703 static int persistent_prepare_merge(struct dm_exception_store *store,
 704                                     chunk_t *last_old_chunk,
 705                                     chunk_t *last_new_chunk)
 706 {
 707         struct pstore *ps = get_info(store);
 708         struct core_exception ce;
 709         int nr_consecutive;
 710         int r;
 711
 712         /*
 713          * When current area is empty, move back to preceding area.
 714          */
 715         if (!ps->current_committed) {
 716                 /*
 717                  * Have we finished?
 718                  */
 719                 if (!ps->current_area)
 720                         return 0;
 721
 722                 ps->current_area--;
 723                 r = area_io(ps, READ);
 724                 if (r < 0)
 725                         return r;
 726                 ps->current_committed = ps->exceptions_per_area;
 727         }
 728
 729         read_exception(ps, ps->current_committed - 1, &ce);
 730         *last_old_chunk = ce.old_chunk;
 731         *last_new_chunk = ce.new_chunk;
 732
 733         /*
 734          * Find number of consecutive chunks within the current area,
 735          * working backwards.
 736          */
 737         for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
 738              nr_consecutive++) {
 739                 read_exception(ps, ps->current_committed - 1 - nr_consecutive,
 740                                &ce);
 741                 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
 742                     ce.new_chunk != *last_new_chunk - nr_consecutive)
 743                         break;
 744         }
 745
 746         return nr_consecutive;
 747 }
 748
 749 static int persistent_commit_merge(struct dm_exception_store *store,
 750                                    int nr_merged)
 751 {
 752         int r, i;
 753         struct pstore *ps = get_info(store);
 754
 755         BUG_ON(nr_merged > ps->current_committed);
 756
 757         for (i = 0; i < nr_merged; i++)
 758                 clear_exception(ps, ps->current_committed - 1 - i);
 759
 760         r = area_io(ps, WRITE_FLUSH_FUA);
 761         if (r < 0)
 762                 return r;
 763
 764         ps->current_committed -= nr_merged;
 765
 766         /*
 767          * At this stage, only persistent_usage() uses ps->next_free, so
 768          * we make no attempt to keep ps->next_free strictly accurate
 769          * as exceptions may have been committed out-of-order originally.
 770          * Once a snapshot has become merging, we set it to the value it
 771          * would have held had all the exceptions been committed in order.
 772          *
 773          * ps->current_area does not get reduced by prepare_merge() until
 774          * after commit_merge() has removed the nr_merged previous exceptions.
 775          */
 776         ps->next_free = area_location(ps, ps->current_area) +
 777                         ps->current_committed + 1;
 778
 779         return 0;
 780 }
 781
 782 static void persistent_drop_snapshot(struct dm_exception_store *store)
 783 {
 784         struct pstore *ps = get_info(store);
 785
 786         ps->valid = 0;
 787         if (write_header(ps))
 788                 DMWARN("write header failed");
 789 }
 790
 791 static int persistent_ctr(struct dm_exception_store *store,
 792                           unsigned argc, char **argv)
 793 {
 794         struct pstore *ps;
 795
 796         /* allocate the pstore */
 797         ps = kzalloc(sizeof(*ps), GFP_KERNEL);
 798         if (!ps)
 799                 return -ENOMEM;
 800
 801         ps->store = store;
 802         ps->valid = 1;
 803         ps->version = SNAPSHOT_DISK_VERSION;
 804         ps->area = NULL;
 805         ps->zero_area = NULL;
 806         ps->header_area = NULL;
 807         ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */
 808         ps->current_committed = 0;
 809
 810         ps->callback_count = 0;
 811         atomic_set(&ps->pending_count, 0);
 812         ps->callbacks = NULL;
 813
 814         ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
 815         if (!ps->metadata_wq) {
 816                 kfree(ps);
 817                 DMERR("couldn't start header metadata update thread");
 818                 return -ENOMEM;
 819         }
 820
 821         store->context = ps;
 822
 823         return 0;
 824 }
 825
 826 static unsigned persistent_status(struct dm_exception_store *store,
 827                                   status_type_t status, char *result,
 828                                   unsigned maxlen)
 829 {
 830         unsigned sz = 0;
 831
 832         switch (status) {
 833         case STATUSTYPE_INFO:
 834                 break;
 835         case STATUSTYPE_TABLE:
 836                 DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
 837         }
 838
 839         return sz;
 840 }
 841
 842 static struct dm_exception_store_type _persistent_type = {
 843         .name = "persistent",
 844         .module = THIS_MODULE,
 845         .ctr = persistent_ctr,
 846         .dtr = persistent_dtr,
 847         .read_metadata = persistent_read_metadata,
 848         .prepare_exception = persistent_prepare_exception,
 849         .commit_exception = persistent_commit_exception,
 850         .prepare_merge = persistent_prepare_merge,
 851         .commit_merge = persistent_commit_merge,
 852         .drop_snapshot = persistent_drop_snapshot,
 853         .usage = persistent_usage,
 854         .status = persistent_status,
 855 };
 856
 857 static struct dm_exception_store_type _persistent_compat_type = {
 858         .name = "P",
 859         .module = THIS_MODULE,
 860         .ctr = persistent_ctr,
 861         .dtr = persistent_dtr,
 862         .read_metadata = persistent_read_metadata,
 863         .prepare_exception = persistent_prepare_exception,
 864         .commit_exception = persistent_commit_exception,
 865         .prepare_merge = persistent_prepare_merge,
 866         .commit_merge = persistent_commit_merge,
 867         .drop_snapshot = persistent_drop_snapshot,
 868         .usage = persistent_usage,
 869         .status = persistent_status,
 870 };
 871
 872 int dm_persistent_snapshot_init(void)
 873 {
 874         int r;
 875
 876         r = dm_exception_store_type_register(&_persistent_type);
 877         if (r) {
 878                 DMERR("Unable to register persistent exception store type");
 879                 return r;
 880         }
 881
 882         r = dm_exception_store_type_register(&_persistent_compat_type);
 883         if (r) {
 884                 DMERR("Unable to register old-style persistent exception "
 885                       "store type");
 886                 dm_exception_store_type_unregister(&_persistent_type);
 887                 return r;
 888         }
 889
 890         return r;
 891 }
 892
 893 void dm_persistent_snapshot_exit(void)
 894 {
 895         dm_exception_store_type_unregister(&_persistent_type);
 896         dm_exception_store_type_unregister(&_persistent_compat_type);
 897 }