block/commit.c

   1 /*
   2  * Live block commit
   3  *
   4  * Copyright Red Hat, Inc. 2012
   5  *
   6  * Authors:
   7  *  Jeff Cody   <jcody@redhat.com>
   8  *  Based on stream.c by Stefan Hajnoczi
   9  *
  10  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  11  * See the COPYING.LIB file in the top-level directory.
  12  *
  13  */
  14
  15 #include "qemu/osdep.h"
  16 #include "qemu/cutils.h"
  17 #include "trace.h"
  18 #include "block/block_int.h"
  19 #include "block/blockjob_int.h"
  20 #include "qapi/error.h"
  21 #include "qapi/qmp/qerror.h"
  22 #include "qemu/ratelimit.h"
  23 #include "sysemu/block-backend.h"
  24
  25 enum {
  26     /*
  27      * Size of data buffer for populating the image file.  This should be large
  28      * enough to process multiple clusters in a single call, so that populating
  29      * contiguous regions of the image is efficient.
  30      */
  31     COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
  32 };
  33
  34 typedef struct CommitBlockJob {
  35     BlockJob common;
  36     BlockDriverState *commit_top_bs;
  37     BlockBackend *top;
  38     BlockBackend *base;
  39     BlockDriverState *base_bs;
  40     BlockdevOnError on_error;
  41     bool base_read_only;
  42     bool chain_frozen;
  43     char *backing_file_str;
  44 } CommitBlockJob;
  45
  46 static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
  47                                         int64_t offset, uint64_t bytes,
  48                                         void *buf)
  49 {
  50     int ret = 0;
  51
  52     assert(bytes < SIZE_MAX);
  53
  54     ret = blk_co_pread(bs, offset, bytes, buf, 0);
  55     if (ret < 0) {
  56         return ret;
  57     }
  58
  59     ret = blk_co_pwrite(base, offset, bytes, buf, 0);
  60     if (ret < 0) {
  61         return ret;
  62     }
  63
  64     return 0;
  65 }
  66
  67 static int commit_prepare(Job *job)
  68 {
  69     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
  70
  71     bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
  72     s->chain_frozen = false;
  73
  74     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
  75      * the normal backing chain can be restored. */
  76     blk_unref(s->base);
  77     s->base = NULL;
  78
  79     /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
  80      * identically. Further work is needed to disambiguate these cases. */
  81     return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
  82                                   s->backing_file_str);
  83 }
  84
  85 static void commit_abort(Job *job)
  86 {
  87     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
  88     BlockDriverState *top_bs = blk_bs(s->top);
  89
  90     if (s->chain_frozen) {
  91         bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
  92     }
  93
  94     /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
  95     bdrv_ref(top_bs);
  96     bdrv_ref(s->commit_top_bs);
  97
  98     if (s->base) {
  99         blk_unref(s->base);
 100     }
 101
 102     /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
 103      * can succeed */
 104     block_job_remove_all_bdrv(&s->common);
 105
 106     /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
 107      * commit filter driver from the backing chain now. Do this as the final
 108      * step so that the 'consistent read' permission can be granted.
 109      *
 110      * XXX Can (or should) we somehow keep 'consistent read' blocked even
 111      * after the failed/cancelled commit job is gone? If we already wrote
 112      * something to base, the intermediate images aren't valid any more. */
 113     bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
 114                       &error_abort);
 115
 116     bdrv_unref(s->commit_top_bs);
 117     bdrv_unref(top_bs);
 118 }
 119
 120 static void commit_clean(Job *job)
 121 {
 122     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
 123
 124     /* restore base open flags here if appropriate (e.g., change the base back
 125      * to r/o). These reopens do not need to be atomic, since we won't abort
 126      * even on failure here */
 127     if (s->base_read_only) {
 128         bdrv_reopen_set_read_only(s->base_bs, true, NULL);
 129     }
 130
 131     g_free(s->backing_file_str);
 132     blk_unref(s->top);
 133 }
 134
 135 static int coroutine_fn commit_run(Job *job, Error **errp)
 136 {
 137     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
 138     int64_t offset;
 139     uint64_t delay_ns = 0;
 140     int ret = 0;
 141     int64_t n = 0; /* bytes */
 142     void *buf = NULL;
 143     int bytes_written = 0;
 144     int64_t len, base_len;
 145
 146     ret = len = blk_getlength(s->top);
 147     if (len < 0) {
 148         goto out;
 149     }
 150     job_progress_set_remaining(&s->common.job, len);
 151
 152     ret = base_len = blk_getlength(s->base);
 153     if (base_len < 0) {
 154         goto out;
 155     }
 156
 157     if (base_len < len) {
 158         ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL);
 159         if (ret) {
 160             goto out;
 161         }
 162     }
 163
 164     buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 165
 166     for (offset = 0; offset < len; offset += n) {
 167         bool copy;
 168
 169         /* Note that even when no rate limit is applied we need to yield
 170          * with no pending I/O here so that bdrv_drain_all() returns.
 171          */
 172         job_sleep_ns(&s->common.job, delay_ns);
 173         if (job_is_cancelled(&s->common.job)) {
 174             break;
 175         }
 176         /* Copy if allocated above the base */
 177         ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false,
 178                                       offset, COMMIT_BUFFER_SIZE, &n);
 179         copy = (ret == 1);
 180         trace_commit_one_iteration(s, offset, n, ret);
 181         if (copy) {
 182             ret = commit_populate(s->top, s->base, offset, n, buf);
 183             bytes_written += n;
 184         }
 185         if (ret < 0) {
 186             BlockErrorAction action =
 187                 block_job_error_action(&s->common, false, s->on_error, -ret);
 188             if (action == BLOCK_ERROR_ACTION_REPORT) {
 189                 goto out;
 190             } else {
 191                 n = 0;
 192                 continue;
 193             }
 194         }
 195         /* Publish progress */
 196         job_progress_update(&s->common.job, n);
 197
 198         if (copy) {
 199             delay_ns = block_job_ratelimit_get_delay(&s->common, n);
 200         } else {
 201             delay_ns = 0;
 202         }
 203     }
 204
 205     ret = 0;
 206
 207 out:
 208     qemu_vfree(buf);
 209
 210     return ret;
 211 }
 212
 213 static const BlockJobDriver commit_job_driver = {
 214     .job_driver = {
 215         .instance_size = sizeof(CommitBlockJob),
 216         .job_type      = JOB_TYPE_COMMIT,
 217         .free          = block_job_free,
 218         .user_resume   = block_job_user_resume,
 219         .drain         = block_job_drain,
 220         .run           = commit_run,
 221         .prepare       = commit_prepare,
 222         .abort         = commit_abort,
 223         .clean         = commit_clean
 224     },
 225 };
 226
 227 static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
 228     uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 229 {
 230     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 231 }
 232
 233 static void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
 234 {
 235     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
 236             bs->backing->bs->filename);
 237 }
 238
 239 static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
 240                                        const BdrvChildRole *role,
 241                                        BlockReopenQueue *reopen_queue,
 242                                        uint64_t perm, uint64_t shared,
 243                                        uint64_t *nperm, uint64_t *nshared)
 244 {
 245     *nperm = 0;
 246     *nshared = BLK_PERM_ALL;
 247 }
 248
 249 /* Dummy node that provides consistent read to its users without requiring it
 250  * from its backing file and that allows writes on the backing file chain. */
 251 static BlockDriver bdrv_commit_top = {
 252     .format_name                = "commit_top",
 253     .bdrv_co_preadv             = bdrv_commit_top_preadv,
 254     .bdrv_co_block_status       = bdrv_co_block_status_from_backing,
 255     .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
 256     .bdrv_child_perm            = bdrv_commit_top_child_perm,
 257 };
 258
 259 void commit_start(const char *job_id, BlockDriverState *bs,
 260                   BlockDriverState *base, BlockDriverState *top,
 261                   int creation_flags, int64_t speed,
 262                   BlockdevOnError on_error, const char *backing_file_str,
 263                   const char *filter_node_name, Error **errp)
 264 {
 265     CommitBlockJob *s;
 266     BlockDriverState *iter;
 267     BlockDriverState *commit_top_bs = NULL;
 268     Error *local_err = NULL;
 269     int ret;
 270
 271     assert(top != bs);
 272     if (top == base) {
 273         error_setg(errp, "Invalid files for merge: top and base are the same");
 274         return;
 275     }
 276
 277     s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
 278                          speed, creation_flags, NULL, NULL, errp);
 279     if (!s) {
 280         return;
 281     }
 282
 283     /* convert base to r/w, if necessary */
 284     s->base_read_only = bdrv_is_read_only(base);
 285     if (s->base_read_only) {
 286         if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
 287             goto fail;
 288         }
 289     }
 290
 291     /* Insert commit_top block node above top, so we can block consistent read
 292      * on the backing chain below it */
 293     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
 294                                          errp);
 295     if (commit_top_bs == NULL) {
 296         goto fail;
 297     }
 298     if (!filter_node_name) {
 299         commit_top_bs->implicit = true;
 300     }
 301     commit_top_bs->total_sectors = top->total_sectors;
 302
 303     bdrv_append(commit_top_bs, top, &local_err);
 304     if (local_err) {
 305         commit_top_bs = NULL;
 306         error_propagate(errp, local_err);
 307         goto fail;
 308     }
 309
 310     s->commit_top_bs = commit_top_bs;
 311
 312     /* Block all nodes between top and base, because they will
 313      * disappear from the chain after this operation. */
 314     assert(bdrv_chain_contains(top, base));
 315     for (iter = top; iter != base; iter = backing_bs(iter)) {
 316         /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
 317          * at s->base (if writes are blocked for a node, they are also blocked
 318          * for its backing file). The other options would be a second filter
 319          * driver above s->base. */
 320         ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
 321                                  BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
 322                                  errp);
 323         if (ret < 0) {
 324             goto fail;
 325         }
 326     }
 327
 328     if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
 329         goto fail;
 330     }
 331     s->chain_frozen = true;
 332
 333     ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
 334     if (ret < 0) {
 335         goto fail;
 336     }
 337
 338     s->base = blk_new(s->common.job.aio_context,
 339                       BLK_PERM_CONSISTENT_READ
 340                       | BLK_PERM_WRITE
 341                       | BLK_PERM_RESIZE,
 342                       BLK_PERM_CONSISTENT_READ
 343                       | BLK_PERM_GRAPH_MOD
 344                       | BLK_PERM_WRITE_UNCHANGED);
 345     ret = blk_insert_bs(s->base, base, errp);
 346     if (ret < 0) {
 347         goto fail;
 348     }
 349     s->base_bs = base;
 350
 351     /* Required permissions are already taken with block_job_add_bdrv() */
 352     s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
 353     ret = blk_insert_bs(s->top, top, errp);
 354     if (ret < 0) {
 355         goto fail;
 356     }
 357
 358     s->backing_file_str = g_strdup(backing_file_str);
 359     s->on_error = on_error;
 360
 361     trace_commit_start(bs, base, top, s);
 362     job_start(&s->common.job);
 363     return;
 364
 365 fail:
 366     if (s->chain_frozen) {
 367         bdrv_unfreeze_backing_chain(commit_top_bs, base);
 368     }
 369     if (s->base) {
 370         blk_unref(s->base);
 371     }
 372     if (s->top) {
 373         blk_unref(s->top);
 374     }
 375     if (s->base_read_only) {
 376         bdrv_reopen_set_read_only(base, true, NULL);
 377     }
 378     job_early_fail(&s->common.job);
 379     /* commit_top_bs has to be replaced after deleting the block job,
 380      * otherwise this would fail because of lack of permissions. */
 381     if (commit_top_bs) {
 382         bdrv_replace_node(commit_top_bs, top, &error_abort);
 383     }
 384 }
 385
 386
 387 #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
 388
 389 /* commit COW file into the raw image */
 390 int bdrv_commit(BlockDriverState *bs)
 391 {
 392     BlockBackend *src, *backing;
 393     BlockDriverState *backing_file_bs = NULL;
 394     BlockDriverState *commit_top_bs = NULL;
 395     BlockDriver *drv = bs->drv;
 396     AioContext *ctx;
 397     int64_t offset, length, backing_length;
 398     int ro;
 399     int64_t n;
 400     int ret = 0;
 401     uint8_t *buf = NULL;
 402     Error *local_err = NULL;
 403
 404     if (!drv)
 405         return -ENOMEDIUM;
 406
 407     if (!bs->backing) {
 408         return -ENOTSUP;
 409     }
 410
 411     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
 412         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
 413         return -EBUSY;
 414     }
 415
 416     ro = bs->backing->bs->read_only;
 417
 418     if (ro) {
 419         if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) {
 420             return -EACCES;
 421         }
 422     }
 423
 424     ctx = bdrv_get_aio_context(bs);
 425     src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
 426     backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 427
 428     ret = blk_insert_bs(src, bs, &local_err);
 429     if (ret < 0) {
 430         error_report_err(local_err);
 431         goto ro_cleanup;
 432     }
 433
 434     /* Insert commit_top block node above backing, so we can write to it */
 435     backing_file_bs = backing_bs(bs);
 436
 437     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
 438                                          &local_err);
 439     if (commit_top_bs == NULL) {
 440         error_report_err(local_err);
 441         goto ro_cleanup;
 442     }
 443
 444     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
 445     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
 446
 447     ret = blk_insert_bs(backing, backing_file_bs, &local_err);
 448     if (ret < 0) {
 449         error_report_err(local_err);
 450         goto ro_cleanup;
 451     }
 452
 453     length = blk_getlength(src);
 454     if (length < 0) {
 455         ret = length;
 456         goto ro_cleanup;
 457     }
 458
 459     backing_length = blk_getlength(backing);
 460     if (backing_length < 0) {
 461         ret = backing_length;
 462         goto ro_cleanup;
 463     }
 464
 465     /* If our top snapshot is larger than the backing file image,
 466      * grow the backing file image if possible.  If not possible,
 467      * we must return an error */
 468     if (length > backing_length) {
 469         ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
 470         if (ret < 0) {
 471             error_report_err(local_err);
 472             goto ro_cleanup;
 473         }
 474     }
 475
 476     /* blk_try_blockalign() for src will choose an alignment that works for
 477      * backing as well, so no need to compare the alignment manually. */
 478     buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
 479     if (buf == NULL) {
 480         ret = -ENOMEM;
 481         goto ro_cleanup;
 482     }
 483
 484     for (offset = 0; offset < length; offset += n) {
 485         ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
 486         if (ret < 0) {
 487             goto ro_cleanup;
 488         }
 489         if (ret) {
 490             ret = blk_pread(src, offset, buf, n);
 491             if (ret < 0) {
 492                 goto ro_cleanup;
 493             }
 494
 495             ret = blk_pwrite(backing, offset, buf, n, 0);
 496             if (ret < 0) {
 497                 goto ro_cleanup;
 498             }
 499         }
 500     }
 501
 502     if (drv->bdrv_make_empty) {
 503         ret = drv->bdrv_make_empty(bs);
 504         if (ret < 0) {
 505             goto ro_cleanup;
 506         }
 507         blk_flush(src);
 508     }
 509
 510     /*
 511      * Make sure all data we wrote to the backing device is actually
 512      * stable on disk.
 513      */
 514     blk_flush(backing);
 515
 516     ret = 0;
 517 ro_cleanup:
 518     qemu_vfree(buf);
 519
 520     blk_unref(backing);
 521     if (backing_file_bs) {
 522         bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
 523     }
 524     bdrv_unref(commit_top_bs);
 525     blk_unref(src);
 526
 527     if (ro) {
 528         /* ignoring error return here */
 529         bdrv_reopen_set_read_only(bs->backing->bs, true, NULL);
 530     }
 531
 532     return ret;
 533 }