drivers/md/kcopyd.c

   1 /*
   2  * Copyright (C) 2002 Sistina Software (UK) Limited.
   3  *
   4  * This file is released under the GPL.
   5  *
   6  * Kcopyd provides a simple interface for copying an area of one
   7  * block-device to one or more other block-devices, with an asynchronous
   8  * completion notification.
   9  */
  10
  11 #include <asm/types.h>
  12 #include <asm/atomic.h>
  13
  14 #include <linux/blkdev.h>
  15 #include <linux/config.h>
  16 #include <linux/fs.h>
  17 #include <linux/init.h>
  18 #include <linux/list.h>
  19 #include <linux/mempool.h>
  20 #include <linux/module.h>
  21 #include <linux/pagemap.h>
  22 #include <linux/slab.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/workqueue.h>
  25 #include <linux/mutex.h>
  26
  27 #include "kcopyd.h"
  28
  29 static struct workqueue_struct *_kcopyd_wq;
  30 static struct work_struct _kcopyd_work;
  31
  32 static inline void wake(void)
  33 {
  34         queue_work(_kcopyd_wq, &_kcopyd_work);
  35 }
  36
  37 /*-----------------------------------------------------------------
  38  * Each kcopyd client has its own little pool of preallocated
  39  * pages for kcopyd io.
  40  *---------------------------------------------------------------*/
  41 struct kcopyd_client {
  42         struct list_head list;
  43
  44         spinlock_t lock;
  45         struct page_list *pages;
  46         unsigned int nr_pages;
  47         unsigned int nr_free_pages;
  48
  49         wait_queue_head_t destroyq;
  50         atomic_t nr_jobs;
  51 };
  52
  53 static struct page_list *alloc_pl(void)
  54 {
  55         struct page_list *pl;
  56
  57         pl = kmalloc(sizeof(*pl), GFP_KERNEL);
  58         if (!pl)
  59                 return NULL;
  60
  61         pl->page = alloc_page(GFP_KERNEL);
  62         if (!pl->page) {
  63                 kfree(pl);
  64                 return NULL;
  65         }
  66
  67         return pl;
  68 }
  69
  70 static void free_pl(struct page_list *pl)
  71 {
  72         __free_page(pl->page);
  73         kfree(pl);
  74 }
  75
  76 static int kcopyd_get_pages(struct kcopyd_client *kc,
  77                             unsigned int nr, struct page_list **pages)
  78 {
  79         struct page_list *pl;
  80
  81         spin_lock(&kc->lock);
  82         if (kc->nr_free_pages < nr) {
  83                 spin_unlock(&kc->lock);
  84                 return -ENOMEM;
  85         }
  86
  87         kc->nr_free_pages -= nr;
  88         for (*pages = pl = kc->pages; --nr; pl = pl->next)
  89                 ;
  90
  91         kc->pages = pl->next;
  92         pl->next = NULL;
  93
  94         spin_unlock(&kc->lock);
  95
  96         return 0;
  97 }
  98
  99 static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl)
 100 {
 101         struct page_list *cursor;
 102
 103         spin_lock(&kc->lock);
 104         for (cursor = pl; cursor->next; cursor = cursor->next)
 105                 kc->nr_free_pages++;
 106
 107         kc->nr_free_pages++;
 108         cursor->next = kc->pages;
 109         kc->pages = pl;
 110         spin_unlock(&kc->lock);
 111 }
 112
 113 /*
 114  * These three functions resize the page pool.
 115  */
 116 static void drop_pages(struct page_list *pl)
 117 {
 118         struct page_list *next;
 119
 120         while (pl) {
 121                 next = pl->next;
 122                 free_pl(pl);
 123                 pl = next;
 124         }
 125 }
 126
 127 static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
 128 {
 129         unsigned int i;
 130         struct page_list *pl = NULL, *next;
 131
 132         for (i = 0; i < nr; i++) {
 133                 next = alloc_pl();
 134                 if (!next) {
 135                         if (pl)
 136                                 drop_pages(pl);
 137                         return -ENOMEM;
 138                 }
 139                 next->next = pl;
 140                 pl = next;
 141         }
 142
 143         kcopyd_put_pages(kc, pl);
 144         kc->nr_pages += nr;
 145         return 0;
 146 }
 147
 148 static void client_free_pages(struct kcopyd_client *kc)
 149 {
 150         BUG_ON(kc->nr_free_pages != kc->nr_pages);
 151         drop_pages(kc->pages);
 152         kc->pages = NULL;
 153         kc->nr_free_pages = kc->nr_pages = 0;
 154 }
 155
 156 /*-----------------------------------------------------------------
 157  * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
 158  * for this reason we use a mempool to prevent the client from
 159  * ever having to do io (which could cause a deadlock).
 160  *---------------------------------------------------------------*/
 161 struct kcopyd_job {
 162         struct kcopyd_client *kc;
 163         struct list_head list;
 164         unsigned long flags;
 165
 166         /*
 167          * Error state of the job.
 168          */
 169         int read_err;
 170         unsigned int write_err;
 171
 172         /*
 173          * Either READ or WRITE
 174          */
 175         int rw;
 176         struct io_region source;
 177
 178         /*
 179          * The destinations for the transfer.
 180          */
 181         unsigned int num_dests;
 182         struct io_region dests[KCOPYD_MAX_REGIONS];
 183
 184         sector_t offset;
 185         unsigned int nr_pages;
 186         struct page_list *pages;
 187
 188         /*
 189          * Set this to ensure you are notified when the job has
 190          * completed.  'context' is for callback to use.
 191          */
 192         kcopyd_notify_fn fn;
 193         void *context;
 194
 195         /*
 196          * These fields are only used if the job has been split
 197          * into more manageable parts.
 198          */
 199         struct semaphore lock;
 200         atomic_t sub_jobs;
 201         sector_t progress;
 202 };
 203
 204 /* FIXME: this should scale with the number of pages */
 205 #define MIN_JOBS 512
 206
 207 static kmem_cache_t *_job_cache;
 208 static mempool_t *_job_pool;
 209
 210 /*
 211  * We maintain three lists of jobs:
 212  *
 213  * i)   jobs waiting for pages
 214  * ii)  jobs that have pages, and are waiting for the io to be issued.
 215  * iii) jobs that have completed.
 216  *
 217  * All three of these are protected by job_lock.
 218  */
 219 static DEFINE_SPINLOCK(_job_lock);
 220
 221 static LIST_HEAD(_complete_jobs);
 222 static LIST_HEAD(_io_jobs);
 223 static LIST_HEAD(_pages_jobs);
 224
 225 static int jobs_init(void)
 226 {
 227         _job_cache = kmem_cache_create("kcopyd-jobs",
 228                                        sizeof(struct kcopyd_job),
 229                                        __alignof__(struct kcopyd_job),
 230                                        0, NULL, NULL);
 231         if (!_job_cache)
 232                 return -ENOMEM;
 233
 234         _job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 235         if (!_job_pool) {
 236                 kmem_cache_destroy(_job_cache);
 237                 return -ENOMEM;
 238         }
 239
 240         return 0;
 241 }
 242
 243 static void jobs_exit(void)
 244 {
 245         BUG_ON(!list_empty(&_complete_jobs));
 246         BUG_ON(!list_empty(&_io_jobs));
 247         BUG_ON(!list_empty(&_pages_jobs));
 248
 249         mempool_destroy(_job_pool);
 250         kmem_cache_destroy(_job_cache);
 251         _job_pool = NULL;
 252         _job_cache = NULL;
 253 }
 254
 255 /*
 256  * Functions to push and pop a job onto the head of a given job
 257  * list.
 258  */
 259 static inline struct kcopyd_job *pop(struct list_head *jobs)
 260 {
 261         struct kcopyd_job *job = NULL;
 262         unsigned long flags;
 263
 264         spin_lock_irqsave(&_job_lock, flags);
 265
 266         if (!list_empty(jobs)) {
 267                 job = list_entry(jobs->next, struct kcopyd_job, list);
 268                 list_del(&job->list);
 269         }
 270         spin_unlock_irqrestore(&_job_lock, flags);
 271
 272         return job;
 273 }
 274
 275 static inline void push(struct list_head *jobs, struct kcopyd_job *job)
 276 {
 277         unsigned long flags;
 278
 279         spin_lock_irqsave(&_job_lock, flags);
 280         list_add_tail(&job->list, jobs);
 281         spin_unlock_irqrestore(&_job_lock, flags);
 282 }
 283
 284 /*
 285  * These three functions process 1 item from the corresponding
 286  * job list.
 287  *
 288  * They return:
 289  * < 0: error
 290  *   0: success
 291  * > 0: can't process yet.
 292  */
 293 static int run_complete_job(struct kcopyd_job *job)
 294 {
 295         void *context = job->context;
 296         int read_err = job->read_err;
 297         unsigned int write_err = job->write_err;
 298         kcopyd_notify_fn fn = job->fn;
 299         struct kcopyd_client *kc = job->kc;
 300
 301         kcopyd_put_pages(kc, job->pages);
 302         mempool_free(job, _job_pool);
 303         fn(read_err, write_err, context);
 304
 305         if (atomic_dec_and_test(&kc->nr_jobs))
 306                 wake_up(&kc->destroyq);
 307
 308         return 0;
 309 }
 310
 311 static void complete_io(unsigned long error, void *context)
 312 {
 313         struct kcopyd_job *job = (struct kcopyd_job *) context;
 314
 315         if (error) {
 316                 if (job->rw == WRITE)
 317                         job->write_err &= error;
 318                 else
 319                         job->read_err = 1;
 320
 321                 if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
 322                         push(&_complete_jobs, job);
 323                         wake();
 324                         return;
 325                 }
 326         }
 327
 328         if (job->rw == WRITE)
 329                 push(&_complete_jobs, job);
 330
 331         else {
 332                 job->rw = WRITE;
 333                 push(&_io_jobs, job);
 334         }
 335
 336         wake();
 337 }
 338
 339 /*
 340  * Request io on as many buffer heads as we can currently get for
 341  * a particular job.
 342  */
 343 static int run_io_job(struct kcopyd_job *job)
 344 {
 345         int r;
 346
 347         if (job->rw == READ)
 348                 r = dm_io_async(1, &job->source, job->rw,
 349                                 job->pages,
 350                                 job->offset, complete_io, job);
 351
 352         else
 353                 r = dm_io_async(job->num_dests, job->dests, job->rw,
 354                                 job->pages,
 355                                 job->offset, complete_io, job);
 356
 357         return r;
 358 }
 359
 360 static int run_pages_job(struct kcopyd_job *job)
 361 {
 362         int r;
 363
 364         job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
 365                                   PAGE_SIZE >> 9);
 366         r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
 367         if (!r) {
 368                 /* this job is ready for io */
 369                 push(&_io_jobs, job);
 370                 return 0;
 371         }
 372
 373         if (r == -ENOMEM)
 374                 /* can't complete now */
 375                 return 1;
 376
 377         return r;
 378 }
 379
 380 /*
 381  * Run through a list for as long as possible.  Returns the count
 382  * of successful jobs.
 383  */
 384 static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
 385 {
 386         struct kcopyd_job *job;
 387         int r, count = 0;
 388
 389         while ((job = pop(jobs))) {
 390
 391                 r = fn(job);
 392
 393                 if (r < 0) {
 394                         /* error this rogue job */
 395                         if (job->rw == WRITE)
 396                                 job->write_err = (unsigned int) -1;
 397                         else
 398                                 job->read_err = 1;
 399                         push(&_complete_jobs, job);
 400                         break;
 401                 }
 402
 403                 if (r > 0) {
 404                         /*
 405                          * We couldn't service this job ATM, so
 406                          * push this job back onto the list.
 407                          */
 408                         push(jobs, job);
 409                         break;
 410                 }
 411
 412                 count++;
 413         }
 414
 415         return count;
 416 }
 417
 418 /*
 419  * kcopyd does this every time it's woken up.
 420  */
 421 static void do_work(void *ignored)
 422 {
 423         /*
 424          * The order that these are called is *very* important.
 425          * complete jobs can free some pages for pages jobs.
 426          * Pages jobs when successful will jump onto the io jobs
 427          * list.  io jobs call wake when they complete and it all
 428          * starts again.
 429          */
 430         process_jobs(&_complete_jobs, run_complete_job);
 431         process_jobs(&_pages_jobs, run_pages_job);
 432         process_jobs(&_io_jobs, run_io_job);
 433 }
 434
 435 /*
 436  * If we are copying a small region we just dispatch a single job
 437  * to do the copy, otherwise the io has to be split up into many
 438  * jobs.
 439  */
 440 static void dispatch_job(struct kcopyd_job *job)
 441 {
 442         atomic_inc(&job->kc->nr_jobs);
 443         push(&_pages_jobs, job);
 444         wake();
 445 }
 446
 447 #define SUB_JOB_SIZE 128
 448 static void segment_complete(int read_err,
 449                              unsigned int write_err, void *context)
 450 {
 451         /* FIXME: tidy this function */
 452         sector_t progress = 0;
 453         sector_t count = 0;
 454         struct kcopyd_job *job = (struct kcopyd_job *) context;
 455
 456         down(&job->lock);
 457
 458         /* update the error */
 459         if (read_err)
 460                 job->read_err = 1;
 461
 462         if (write_err)
 463                 job->write_err &= write_err;
 464
 465         /*
 466          * Only dispatch more work if there hasn't been an error.
 467          */
 468         if ((!job->read_err && !job->write_err) ||
 469             test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
 470                 /* get the next chunk of work */
 471                 progress = job->progress;
 472                 count = job->source.count - progress;
 473                 if (count) {
 474                         if (count > SUB_JOB_SIZE)
 475                                 count = SUB_JOB_SIZE;
 476
 477                         job->progress += count;
 478                 }
 479         }
 480         up(&job->lock);
 481
 482         if (count) {
 483                 int i;
 484                 struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO);
 485
 486                 *sub_job = *job;
 487                 sub_job->source.sector += progress;
 488                 sub_job->source.count = count;
 489
 490                 for (i = 0; i < job->num_dests; i++) {
 491                         sub_job->dests[i].sector += progress;
 492                         sub_job->dests[i].count = count;
 493                 }
 494
 495                 sub_job->fn = segment_complete;
 496                 sub_job->context = job;
 497                 dispatch_job(sub_job);
 498
 499         } else if (atomic_dec_and_test(&job->sub_jobs)) {
 500
 501                 /*
 502                  * To avoid a race we must keep the job around
 503                  * until after the notify function has completed.
 504                  * Otherwise the client may try and stop the job
 505                  * after we've completed.
 506                  */
 507                 job->fn(read_err, write_err, job->context);
 508                 mempool_free(job, _job_pool);
 509         }
 510 }
 511
 512 /*
 513  * Create some little jobs that will do the move between
 514  * them.
 515  */
 516 #define SPLIT_COUNT 8
 517 static void split_job(struct kcopyd_job *job)
 518 {
 519         int i;
 520
 521         atomic_set(&job->sub_jobs, SPLIT_COUNT);
 522         for (i = 0; i < SPLIT_COUNT; i++)
 523                 segment_complete(0, 0u, job);
 524 }
 525
 526 int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
 527                 unsigned int num_dests, struct io_region *dests,
 528                 unsigned int flags, kcopyd_notify_fn fn, void *context)
 529 {
 530         struct kcopyd_job *job;
 531
 532         /*
 533          * Allocate a new job.
 534          */
 535         job = mempool_alloc(_job_pool, GFP_NOIO);
 536
 537         /*
 538          * set up for the read.
 539          */
 540         job->kc = kc;
 541         job->flags = flags;
 542         job->read_err = 0;
 543         job->write_err = 0;
 544         job->rw = READ;
 545
 546         job->source = *from;
 547
 548         job->num_dests = num_dests;
 549         memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 550
 551         job->offset = 0;
 552         job->nr_pages = 0;
 553         job->pages = NULL;
 554
 555         job->fn = fn;
 556         job->context = context;
 557
 558         if (job->source.count < SUB_JOB_SIZE)
 559                 dispatch_job(job);
 560
 561         else {
 562                 init_MUTEX(&job->lock);
 563                 job->progress = 0;
 564                 split_job(job);
 565         }
 566
 567         return 0;
 568 }
 569
 570 /*
 571  * Cancels a kcopyd job, eg. someone might be deactivating a
 572  * mirror.
 573  */
 574 #if 0
 575 int kcopyd_cancel(struct kcopyd_job *job, int block)
 576 {
 577         /* FIXME: finish */
 578         return -1;
 579 }
 580 #endif  /*  0  */
 581
 582 /*-----------------------------------------------------------------
 583  * Unit setup
 584  *---------------------------------------------------------------*/
 585 static DEFINE_MUTEX(_client_lock);
 586 static LIST_HEAD(_clients);
 587
 588 static void client_add(struct kcopyd_client *kc)
 589 {
 590         mutex_lock(&_client_lock);
 591         list_add(&kc->list, &_clients);
 592         mutex_unlock(&_client_lock);
 593 }
 594
 595 static void client_del(struct kcopyd_client *kc)
 596 {
 597         mutex_lock(&_client_lock);
 598         list_del(&kc->list);
 599         mutex_unlock(&_client_lock);
 600 }
 601
 602 static DEFINE_MUTEX(kcopyd_init_lock);
 603 static int kcopyd_clients = 0;
 604
 605 static int kcopyd_init(void)
 606 {
 607         int r;
 608
 609         mutex_lock(&kcopyd_init_lock);
 610
 611         if (kcopyd_clients) {
 612                 /* Already initialized. */
 613                 kcopyd_clients++;
 614                 mutex_unlock(&kcopyd_init_lock);
 615                 return 0;
 616         }
 617
 618         r = jobs_init();
 619         if (r) {
 620                 mutex_unlock(&kcopyd_init_lock);
 621                 return r;
 622         }
 623
 624         _kcopyd_wq = create_singlethread_workqueue("kcopyd");
 625         if (!_kcopyd_wq) {
 626                 jobs_exit();
 627                 mutex_unlock(&kcopyd_init_lock);
 628                 return -ENOMEM;
 629         }
 630
 631         kcopyd_clients++;
 632         INIT_WORK(&_kcopyd_work, do_work, NULL);
 633         mutex_unlock(&kcopyd_init_lock);
 634         return 0;
 635 }
 636
 637 static void kcopyd_exit(void)
 638 {
 639         mutex_lock(&kcopyd_init_lock);
 640         kcopyd_clients--;
 641         if (!kcopyd_clients) {
 642                 jobs_exit();
 643                 destroy_workqueue(_kcopyd_wq);
 644                 _kcopyd_wq = NULL;
 645         }
 646         mutex_unlock(&kcopyd_init_lock);
 647 }
 648
 649 int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
 650 {
 651         int r = 0;
 652         struct kcopyd_client *kc;
 653
 654         r = kcopyd_init();
 655         if (r)
 656                 return r;
 657
 658         kc = kmalloc(sizeof(*kc), GFP_KERNEL);
 659         if (!kc) {
 660                 kcopyd_exit();
 661                 return -ENOMEM;
 662         }
 663
 664         spin_lock_init(&kc->lock);
 665         kc->pages = NULL;
 666         kc->nr_pages = kc->nr_free_pages = 0;
 667         r = client_alloc_pages(kc, nr_pages);
 668         if (r) {
 669                 kfree(kc);
 670                 kcopyd_exit();
 671                 return r;
 672         }
 673
 674         r = dm_io_get(nr_pages);
 675         if (r) {
 676                 client_free_pages(kc);
 677                 kfree(kc);
 678                 kcopyd_exit();
 679                 return r;
 680         }
 681
 682         init_waitqueue_head(&kc->destroyq);
 683         atomic_set(&kc->nr_jobs, 0);
 684
 685         client_add(kc);
 686         *result = kc;
 687         return 0;
 688 }
 689
 690 void kcopyd_client_destroy(struct kcopyd_client *kc)
 691 {
 692         /* Wait for completion of all jobs submitted by this client. */
 693         wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
 694
 695         dm_io_put(kc->nr_pages);
 696         client_free_pages(kc);
 697         client_del(kc);
 698         kfree(kc);
 699         kcopyd_exit();
 700 }
 701
 702 EXPORT_SYMBOL(kcopyd_client_create);
 703 EXPORT_SYMBOL(kcopyd_client_destroy);
 704 EXPORT_SYMBOL(kcopyd_copy);