usr/src/uts/common/fs/zfs/dsl_destroy.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
  26  * Copyright (c) 2014 Integros [integros.com]
  27  */
  28
  29 #include <sys/zfs_context.h>
  30 #include <sys/dsl_userhold.h>
  31 #include <sys/dsl_dataset.h>
  32 #include <sys/dsl_synctask.h>
  33 #include <sys/dsl_destroy.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/dsl_pool.h>
  36 #include <sys/dsl_dir.h>
  37 #include <sys/dmu_traverse.h>
  38 #include <sys/dsl_scan.h>
  39 #include <sys/dmu_objset.h>
  40 #include <sys/zap.h>
  41 #include <sys/zfeature.h>
  42 #include <sys/zfs_ioctl.h>
  43 #include <sys/dsl_deleg.h>
  44 #include <sys/dmu_impl.h>
  45 #include <sys/zcp.h>
  46
  47 int
  48 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
  49 {
  50         if (!ds->ds_is_snapshot)
  51                 return (SET_ERROR(EINVAL));
  52
  53         if (dsl_dataset_long_held(ds))
  54                 return (SET_ERROR(EBUSY));
  55
  56         /*
  57          * Only allow deferred destroy on pools that support it.
  58          * NOTE: deferred destroy is only supported on snapshots.
  59          */
  60         if (defer) {
  61                 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
  62                     SPA_VERSION_USERREFS)
  63                         return (SET_ERROR(ENOTSUP));
  64                 return (0);
  65         }
  66
  67         /*
  68          * If this snapshot has an elevated user reference count,
  69          * we can't destroy it yet.
  70          */
  71         if (ds->ds_userrefs > 0)
  72                 return (SET_ERROR(EBUSY));
  73
  74         /*
  75          * Can't delete a branch point.
  76          */
  77         if (dsl_dataset_phys(ds)->ds_num_children > 1)
  78                 return (SET_ERROR(EEXIST));
  79
  80         return (0);
  81 }
  82
  83 int
  84 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
  85 {
  86         dsl_destroy_snapshot_arg_t *ddsa = arg;
  87         const char *dsname = ddsa->ddsa_name;
  88         boolean_t defer = ddsa->ddsa_defer;
  89
  90         dsl_pool_t *dp = dmu_tx_pool(tx);
  91         int error = 0;
  92         dsl_dataset_t *ds;
  93
  94         error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
  95
  96         /*
  97          * If the snapshot does not exist, silently ignore it, and
  98          * dsl_destroy_snapshot_sync() will be a no-op
  99          * (it's "already destroyed").
 100          */
 101         if (error == ENOENT)
 102                 return (0);
 103
 104         if (error == 0) {
 105                 error = dsl_destroy_snapshot_check_impl(ds, defer);
 106                 dsl_dataset_rele(ds, FTAG);
 107         }
 108
 109         return (error);
 110 }
 111
 112 struct process_old_arg {
 113         dsl_dataset_t *ds;
 114         dsl_dataset_t *ds_prev;
 115         boolean_t after_branch_point;
 116         zio_t *pio;
 117         uint64_t used, comp, uncomp;
 118 };
 119
 120 static int
 121 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 122 {
 123         struct process_old_arg *poa = arg;
 124         dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
 125
 126         ASSERT(!BP_IS_HOLE(bp));
 127
 128         if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
 129                 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
 130                 if (poa->ds_prev && !poa->after_branch_point &&
 131                     bp->blk_birth >
 132                     dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
 133                         dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
 134                             bp_get_dsize_sync(dp->dp_spa, bp);
 135                 }
 136         } else {
 137                 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
 138                 poa->comp += BP_GET_PSIZE(bp);
 139                 poa->uncomp += BP_GET_UCSIZE(bp);
 140                 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
 141         }
 142         return (0);
 143 }
 144
 145 static void
 146 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
 147     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
 148 {
 149         struct process_old_arg poa = { 0 };
 150         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 151         objset_t *mos = dp->dp_meta_objset;
 152         uint64_t deadlist_obj;
 153
 154         ASSERT(ds->ds_deadlist.dl_oldfmt);
 155         ASSERT(ds_next->ds_deadlist.dl_oldfmt);
 156
 157         poa.ds = ds;
 158         poa.ds_prev = ds_prev;
 159         poa.after_branch_point = after_branch_point;
 160         poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 161         VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
 162             process_old_cb, &poa, tx));
 163         VERIFY0(zio_wait(poa.pio));
 164         ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
 165
 166         /* change snapused */
 167         dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 168             -poa.used, -poa.comp, -poa.uncomp, tx);
 169
 170         /* swap next's deadlist to our deadlist */
 171         dsl_deadlist_close(&ds->ds_deadlist);
 172         dsl_deadlist_close(&ds_next->ds_deadlist);
 173         deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
 174         dsl_dataset_phys(ds)->ds_deadlist_obj =
 175             dsl_dataset_phys(ds_next)->ds_deadlist_obj;
 176         dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
 177         dsl_deadlist_open(&ds->ds_deadlist, mos,
 178             dsl_dataset_phys(ds)->ds_deadlist_obj);
 179         dsl_deadlist_open(&ds_next->ds_deadlist, mos,
 180             dsl_dataset_phys(ds_next)->ds_deadlist_obj);
 181 }
 182
 183 static void
 184 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
 185 {
 186         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 187         zap_cursor_t zc;
 188         zap_attribute_t za;
 189
 190         /*
 191          * If it is the old version, dd_clones doesn't exist so we can't
 192          * find the clones, but dsl_deadlist_remove_key() is a no-op so it
 193          * doesn't matter.
 194          */
 195         if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
 196                 return;
 197
 198         for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
 199             zap_cursor_retrieve(&zc, &za) == 0;
 200             zap_cursor_advance(&zc)) {
 201                 dsl_dataset_t *clone;
 202
 203                 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 204                     za.za_first_integer, FTAG, &clone));
 205                 if (clone->ds_dir->dd_origin_txg > mintxg) {
 206                         dsl_deadlist_remove_key(&clone->ds_deadlist,
 207                             mintxg, tx);
 208                         dsl_dataset_remove_clones_key(clone, mintxg, tx);
 209                 }
 210                 dsl_dataset_rele(clone, FTAG);
 211         }
 212         zap_cursor_fini(&zc);
 213 }
 214
 215 void
 216 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 217 {
 218         int err;
 219         int after_branch_point = FALSE;
 220         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 221         objset_t *mos = dp->dp_meta_objset;
 222         dsl_dataset_t *ds_prev = NULL;
 223         uint64_t obj;
 224
 225         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 226         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 227         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
 228         rrw_exit(&ds->ds_bp_rwlock, FTAG);
 229         ASSERT(refcount_is_zero(&ds->ds_longholds));
 230
 231         if (defer &&
 232             (ds->ds_userrefs > 0 ||
 233             dsl_dataset_phys(ds)->ds_num_children > 1)) {
 234                 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 235                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 236                 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
 237                 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
 238                 return;
 239         }
 240
 241         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 242
 243         /* We need to log before removing it from the namespace. */
 244         spa_history_log_internal_ds(ds, "destroy", tx, "");
 245
 246         dsl_scan_ds_destroyed(ds, tx);
 247
 248         obj = ds->ds_object;
 249
 250         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
 251                 if (ds->ds_feature_inuse[f]) {
 252                         dsl_dataset_deactivate_feature(obj, f, tx);
 253                         ds->ds_feature_inuse[f] = B_FALSE;
 254                 }
 255         }
 256         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 257                 ASSERT3P(ds->ds_prev, ==, NULL);
 258                 VERIFY0(dsl_dataset_hold_obj(dp,
 259                     dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
 260                 after_branch_point =
 261                     (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
 262
 263                 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 264                 if (after_branch_point &&
 265                     dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
 266                         dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
 267                         if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
 268                                 VERIFY0(zap_add_int(mos,
 269                                     dsl_dataset_phys(ds_prev)->
 270                                     ds_next_clones_obj,
 271                                     dsl_dataset_phys(ds)->ds_next_snap_obj,
 272                                     tx));
 273                         }
 274                 }
 275                 if (!after_branch_point) {
 276                         dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
 277                             dsl_dataset_phys(ds)->ds_next_snap_obj;
 278                 }
 279         }
 280
 281         dsl_dataset_t *ds_next;
 282         uint64_t old_unique;
 283         uint64_t used = 0, comp = 0, uncomp = 0;
 284
 285         VERIFY0(dsl_dataset_hold_obj(dp,
 286             dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
 287         ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
 288
 289         old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
 290
 291         dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 292         dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
 293             dsl_dataset_phys(ds)->ds_prev_snap_obj;
 294         dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
 295             dsl_dataset_phys(ds)->ds_prev_snap_txg;
 296         ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
 297             ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
 298
 299         if (ds_next->ds_deadlist.dl_oldfmt) {
 300                 process_old_deadlist(ds, ds_prev, ds_next,
 301                     after_branch_point, tx);
 302         } else {
 303                 /* Adjust prev's unique space. */
 304                 if (ds_prev && !after_branch_point) {
 305                         dsl_deadlist_space_range(&ds_next->ds_deadlist,
 306                             dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
 307                             dsl_dataset_phys(ds)->ds_prev_snap_txg,
 308                             &used, &comp, &uncomp);
 309                         dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
 310                 }
 311
 312                 /* Adjust snapused. */
 313                 dsl_deadlist_space_range(&ds_next->ds_deadlist,
 314                     dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
 315                     &used, &comp, &uncomp);
 316                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 317                     -used, -comp, -uncomp, tx);
 318
 319                 /* Move blocks to be freed to pool's free list. */
 320                 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
 321                     &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
 322                     tx);
 323                 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 324                     DD_USED_HEAD, used, comp, uncomp, tx);
 325
 326                 /* Merge our deadlist into next's and free it. */
 327                 dsl_deadlist_merge(&ds_next->ds_deadlist,
 328                     dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 329         }
 330         dsl_deadlist_close(&ds->ds_deadlist);
 331         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 332         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 333         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
 334
 335         /* Collapse range in clone heads */
 336         dsl_dataset_remove_clones_key(ds,
 337             dsl_dataset_phys(ds)->ds_creation_txg, tx);
 338
 339         if (ds_next->ds_is_snapshot) {
 340                 dsl_dataset_t *ds_nextnext;
 341
 342                 /*
 343                  * Update next's unique to include blocks which
 344                  * were previously shared by only this snapshot
 345                  * and it.  Those blocks will be born after the
 346                  * prev snap and before this snap, and will have
 347                  * died after the next snap and before the one
 348                  * after that (ie. be on the snap after next's
 349                  * deadlist).
 350                  */
 351                 VERIFY0(dsl_dataset_hold_obj(dp,
 352                     dsl_dataset_phys(ds_next)->ds_next_snap_obj,
 353                     FTAG, &ds_nextnext));
 354                 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
 355                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
 356                     dsl_dataset_phys(ds)->ds_creation_txg,
 357                     &used, &comp, &uncomp);
 358                 dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
 359                 dsl_dataset_rele(ds_nextnext, FTAG);
 360                 ASSERT3P(ds_next->ds_prev, ==, NULL);
 361
 362                 /* Collapse range in this head. */
 363                 dsl_dataset_t *hds;
 364                 VERIFY0(dsl_dataset_hold_obj(dp,
 365                     dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
 366                 dsl_deadlist_remove_key(&hds->ds_deadlist,
 367                     dsl_dataset_phys(ds)->ds_creation_txg, tx);
 368                 dsl_dataset_rele(hds, FTAG);
 369
 370         } else {
 371                 ASSERT3P(ds_next->ds_prev, ==, ds);
 372                 dsl_dataset_rele(ds_next->ds_prev, ds_next);
 373                 ds_next->ds_prev = NULL;
 374                 if (ds_prev) {
 375                         VERIFY0(dsl_dataset_hold_obj(dp,
 376                             dsl_dataset_phys(ds)->ds_prev_snap_obj,
 377                             ds_next, &ds_next->ds_prev));
 378                 }
 379
 380                 dsl_dataset_recalc_head_uniq(ds_next);
 381
 382                 /*
 383                  * Reduce the amount of our unconsumed refreservation
 384                  * being charged to our parent by the amount of
 385                  * new unique data we have gained.
 386                  */
 387                 if (old_unique < ds_next->ds_reserved) {
 388                         int64_t mrsdelta;
 389                         uint64_t new_unique =
 390                             dsl_dataset_phys(ds_next)->ds_unique_bytes;
 391
 392                         ASSERT(old_unique <= new_unique);
 393                         mrsdelta = MIN(new_unique - old_unique,
 394                             ds_next->ds_reserved - old_unique);
 395                         dsl_dir_diduse_space(ds->ds_dir,
 396                             DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
 397                 }
 398         }
 399         dsl_dataset_rele(ds_next, FTAG);
 400
 401         /*
 402          * This must be done after the dsl_traverse(), because it will
 403          * re-open the objset.
 404          */
 405         if (ds->ds_objset) {
 406                 dmu_objset_evict(ds->ds_objset);
 407                 ds->ds_objset = NULL;
 408         }
 409
 410         /* remove from snapshot namespace */
 411         dsl_dataset_t *ds_head;
 412         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
 413         VERIFY0(dsl_dataset_hold_obj(dp,
 414             dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
 415         VERIFY0(dsl_dataset_get_snapname(ds));
 416 #ifdef ZFS_DEBUG
 417         {
 418                 uint64_t val;
 419
 420                 err = dsl_dataset_snap_lookup(ds_head,
 421                     ds->ds_snapname, &val);
 422                 ASSERT0(err);
 423                 ASSERT3U(val, ==, obj);
 424         }
 425 #endif
 426         VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
 427         dsl_dataset_rele(ds_head, FTAG);
 428
 429         if (ds_prev != NULL)
 430                 dsl_dataset_rele(ds_prev, FTAG);
 431
 432         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 433
 434         if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
 435                 uint64_t count;
 436                 ASSERT0(zap_count(mos,
 437                     dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
 438                     count == 0);
 439                 VERIFY0(dmu_object_free(mos,
 440                     dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
 441         }
 442         if (dsl_dataset_phys(ds)->ds_props_obj != 0)
 443                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
 444                     tx));
 445         if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
 446                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
 447                     tx));
 448         dsl_dir_rele(ds->ds_dir, ds);
 449         ds->ds_dir = NULL;
 450         dmu_object_free_zapified(mos, obj, tx);
 451 }
 452
 453 void
 454 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
 455 {
 456         dsl_destroy_snapshot_arg_t *ddsa = arg;
 457         const char *dsname = ddsa->ddsa_name;
 458         boolean_t defer = ddsa->ddsa_defer;
 459
 460         dsl_pool_t *dp = dmu_tx_pool(tx);
 461         dsl_dataset_t *ds;
 462
 463         int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 464         if (error == ENOENT)
 465                 return;
 466         ASSERT0(error);
 467         dsl_destroy_snapshot_sync_impl(ds, defer, tx);
 468         dsl_dataset_rele(ds, FTAG);
 469 }
 470
 471 /*
 472  * The semantics of this function are described in the comment above
 473  * lzc_destroy_snaps().  To summarize:
 474  *
 475  * The snapshots must all be in the same pool.
 476  *
 477  * Snapshots that don't exist will be silently ignored (considered to be
 478  * "already deleted").
 479  *
 480  * On success, all snaps will be destroyed and this will return 0.
 481  * On failure, no snaps will be destroyed, the errlist will be filled in,
 482  * and this will return an errno.
 483  */
 484 int
 485 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
 486     nvlist_t *errlist)
 487 {
 488         if (nvlist_next_nvpair(snaps, NULL) == NULL)
 489                 return (0);
 490
 491         /*
 492          * lzc_destroy_snaps() is documented to take an nvlist whose
 493          * values "don't matter".  We need to convert that nvlist to
 494          * one that we know can be converted to LUA. We also don't
 495          * care about any duplicate entries because the nvlist will
 496          * be converted to a LUA table which should take care of this.
 497          */
 498         nvlist_t *snaps_normalized;
 499         VERIFY0(nvlist_alloc(&snaps_normalized, 0, KM_SLEEP));
 500         for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
 501             pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
 502                 fnvlist_add_boolean_value(snaps_normalized,
 503                     nvpair_name(pair), B_TRUE);
 504         }
 505
 506         nvlist_t *arg;
 507         VERIFY0(nvlist_alloc(&arg, 0, KM_SLEEP));
 508         fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
 509         fnvlist_free(snaps_normalized);
 510         fnvlist_add_boolean_value(arg, "defer", defer);
 511
 512         nvlist_t *wrapper;
 513         VERIFY0(nvlist_alloc(&wrapper, 0, KM_SLEEP));
 514         fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
 515         fnvlist_free(arg);
 516
 517         const char *program =
 518             "arg = ...\n"
 519             "snaps = arg['snaps']\n"
 520             "defer = arg['defer']\n"
 521             "errors = { }\n"
 522             "has_errors = false\n"
 523             "for snap, v in pairs(snaps) do\n"
 524             "    errno = zfs.check.destroy{snap, defer=defer}\n"
 525             "    zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n"
 526             "    if errno == ENOENT then\n"
 527             "        snaps[snap] = nil\n"
 528             "    elseif errno ~= 0 then\n"
 529             "        errors[snap] = errno\n"
 530             "        has_errors = true\n"
 531             "    end\n"
 532             "end\n"
 533             "if has_errors then\n"
 534             "    return errors\n"
 535             "end\n"
 536             "for snap, v in pairs(snaps) do\n"
 537             "    errno = zfs.sync.destroy{snap, defer=defer}\n"
 538             "    assert(errno == 0)\n"
 539             "end\n"
 540             "return { }\n";
 541
 542         nvlist_t *result = fnvlist_alloc();
 543         int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)),
 544             program,
 545             0,
 546             zfs_lua_max_memlimit,
 547             nvlist_next_nvpair(wrapper, NULL), result);
 548         if (error != 0) {
 549                 char *errorstr = NULL;
 550                 (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
 551                 if (errorstr != NULL) {
 552                         zfs_dbgmsg(errorstr);
 553                 }
 554                 return (error);
 555         }
 556         fnvlist_free(wrapper);
 557
 558         /*
 559          * lzc_destroy_snaps() is documented to fill the errlist with
 560          * int32 values, so we need to covert the int64 values that are
 561          * returned from LUA.
 562          */
 563         int rv = 0;
 564         nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN);
 565         for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL);
 566             pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) {
 567                 int32_t val = (int32_t)fnvpair_value_int64(pair);
 568                 if (rv == 0)
 569                         rv = val;
 570                 fnvlist_add_int32(errlist, nvpair_name(pair), val);
 571         }
 572         fnvlist_free(result);
 573         return (rv);
 574 }
 575
 576 int
 577 dsl_destroy_snapshot(const char *name, boolean_t defer)
 578 {
 579         int error;
 580         nvlist_t *nvl = fnvlist_alloc();
 581         nvlist_t *errlist = fnvlist_alloc();
 582
 583         fnvlist_add_boolean(nvl, name);
 584         error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
 585         fnvlist_free(errlist);
 586         fnvlist_free(nvl);
 587         return (error);
 588 }
 589
 590 struct killarg {
 591         dsl_dataset_t *ds;
 592         dmu_tx_t *tx;
 593 };
 594
 595 /* ARGSUSED */
 596 static int
 597 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 598     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 599 {
 600         struct killarg *ka = arg;
 601         dmu_tx_t *tx = ka->tx;
 602
 603         if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 604                 return (0);
 605
 606         if (zb->zb_level == ZB_ZIL_LEVEL) {
 607                 ASSERT(zilog != NULL);
 608                 /*
 609                  * It's a block in the intent log.  It has no
 610                  * accounting, so just free it.
 611                  */
 612                 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 613         } else {
 614                 ASSERT(zilog == NULL);
 615                 ASSERT3U(bp->blk_birth, >,
 616                     dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
 617                 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 618         }
 619
 620         return (0);
 621 }
 622
 623 static void
 624 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 625 {
 626         struct killarg ka;
 627
 628         /*
 629          * Free everything that we point to (that's born after
 630          * the previous snapshot, if we are a clone)
 631          *
 632          * NB: this should be very quick, because we already
 633          * freed all the objects in open context.
 634          */
 635         ka.ds = ds;
 636         ka.tx = tx;
 637         VERIFY0(traverse_dataset(ds,
 638             dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
 639             kill_blkptr, &ka));
 640         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 641             dsl_dataset_phys(ds)->ds_unique_bytes == 0);
 642 }
 643
 644 int
 645 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
 646 {
 647         int error;
 648         uint64_t count;
 649         objset_t *mos;
 650
 651         ASSERT(!ds->ds_is_snapshot);
 652         if (ds->ds_is_snapshot)
 653                 return (SET_ERROR(EINVAL));
 654
 655         if (refcount_count(&ds->ds_longholds) != expected_holds)
 656                 return (SET_ERROR(EBUSY));
 657
 658         mos = ds->ds_dir->dd_pool->dp_meta_objset;
 659
 660         /*
 661          * Can't delete a head dataset if there are snapshots of it.
 662          * (Except if the only snapshots are from the branch we cloned
 663          * from.)
 664          */
 665         if (ds->ds_prev != NULL &&
 666             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
 667                 return (SET_ERROR(EBUSY));
 668
 669         /*
 670          * Can't delete if there are children of this fs.
 671          */
 672         error = zap_count(mos,
 673             dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
 674         if (error != 0)
 675                 return (error);
 676         if (count != 0)
 677                 return (SET_ERROR(EEXIST));
 678
 679         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 680             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
 681             ds->ds_prev->ds_userrefs == 0) {
 682                 /* We need to remove the origin snapshot as well. */
 683                 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
 684                         return (SET_ERROR(EBUSY));
 685         }
 686         return (0);
 687 }
 688
 689 int
 690 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
 691 {
 692         dsl_destroy_head_arg_t *ddha = arg;
 693         dsl_pool_t *dp = dmu_tx_pool(tx);
 694         dsl_dataset_t *ds;
 695         int error;
 696
 697         error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
 698         if (error != 0)
 699                 return (error);
 700
 701         error = dsl_destroy_head_check_impl(ds, 0);
 702         dsl_dataset_rele(ds, FTAG);
 703         return (error);
 704 }
 705
 706 static void
 707 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
 708 {
 709         dsl_dir_t *dd;
 710         dsl_pool_t *dp = dmu_tx_pool(tx);
 711         objset_t *mos = dp->dp_meta_objset;
 712         dd_used_t t;
 713
 714         ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
 715
 716         VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
 717
 718         ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
 719
 720         /*
 721          * Decrement the filesystem count for all parent filesystems.
 722          *
 723          * When we receive an incremental stream into a filesystem that already
 724          * exists, a temporary clone is created.  We never count this temporary
 725          * clone, whose name begins with a '%'.
 726          */
 727         if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL)
 728                 dsl_fs_ss_count_adjust(dd->dd_parent, -1,
 729                     DD_FIELD_FILESYSTEM_COUNT, tx);
 730
 731         /*
 732          * Remove our reservation. The impl() routine avoids setting the
 733          * actual property, which would require the (already destroyed) ds.
 734          */
 735         dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 736
 737         ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
 738         ASSERT0(dsl_dir_phys(dd)->dd_reserved);
 739         for (t = 0; t < DD_USED_NUM; t++)
 740                 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
 741
 742         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
 743         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
 744         VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
 745         VERIFY0(zap_remove(mos,
 746             dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
 747             dd->dd_myname, tx));
 748
 749         dsl_dir_rele(dd, FTAG);
 750         dmu_object_free_zapified(mos, ddobj, tx);
 751 }
 752
 753 void
 754 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 755 {
 756         dsl_pool_t *dp = dmu_tx_pool(tx);
 757         objset_t *mos = dp->dp_meta_objset;
 758         uint64_t obj, ddobj, prevobj = 0;
 759         boolean_t rmorigin;
 760
 761         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 762         ASSERT(ds->ds_prev == NULL ||
 763             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
 764         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 765         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
 766         rrw_exit(&ds->ds_bp_rwlock, FTAG);
 767         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 768
 769         /* We need to log before removing it from the namespace. */
 770         spa_history_log_internal_ds(ds, "destroy", tx, "");
 771
 772         rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
 773             DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 774             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
 775             ds->ds_prev->ds_userrefs == 0);
 776
 777         /* Remove our reservation. */
 778         if (ds->ds_reserved != 0) {
 779                 dsl_dataset_set_refreservation_sync_impl(ds,
 780                     (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
 781                     0, tx);
 782                 ASSERT0(ds->ds_reserved);
 783         }
 784
 785         obj = ds->ds_object;
 786
 787         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
 788                 if (ds->ds_feature_inuse[f]) {
 789                         dsl_dataset_deactivate_feature(obj, f, tx);
 790                         ds->ds_feature_inuse[f] = B_FALSE;
 791                 }
 792         }
 793
 794         dsl_scan_ds_destroyed(ds, tx);
 795
 796         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 797                 /* This is a clone */
 798                 ASSERT(ds->ds_prev != NULL);
 799                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
 800                     obj);
 801                 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
 802
 803                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 804                 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
 805                         dsl_dataset_remove_from_next_clones(ds->ds_prev,
 806                             obj, tx);
 807                 }
 808
 809                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
 810                 dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
 811         }
 812
 813         /*
 814          * Destroy the deadlist.  Unless it's a clone, the
 815          * deadlist should be empty.  (If it's a clone, it's
 816          * safe to ignore the deadlist contents.)
 817          */
 818         dsl_deadlist_close(&ds->ds_deadlist);
 819         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 820         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 821         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
 822
 823         objset_t *os;
 824         VERIFY0(dmu_objset_from_ds(ds, &os));
 825
 826         if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
 827                 old_synchronous_dataset_destroy(ds, tx);
 828         } else {
 829                 /*
 830                  * Move the bptree into the pool's list of trees to
 831                  * clean up and update space accounting information.
 832                  */
 833                 uint64_t used, comp, uncomp;
 834
 835                 zil_destroy_sync(dmu_objset_zil(os), tx);
 836
 837                 if (!spa_feature_is_active(dp->dp_spa,
 838                     SPA_FEATURE_ASYNC_DESTROY)) {
 839                         dsl_scan_t *scn = dp->dp_scan;
 840                         spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
 841                             tx);
 842                         dp->dp_bptree_obj = bptree_alloc(mos, tx);
 843                         VERIFY0(zap_add(mos,
 844                             DMU_POOL_DIRECTORY_OBJECT,
 845                             DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 846                             &dp->dp_bptree_obj, tx));
 847                         ASSERT(!scn->scn_async_destroying);
 848                         scn->scn_async_destroying = B_TRUE;
 849                 }
 850
 851                 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
 852                 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
 853                 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
 854
 855                 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 856                     dsl_dataset_phys(ds)->ds_unique_bytes == used);
 857
 858                 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 859                 bptree_add(mos, dp->dp_bptree_obj,
 860                     &dsl_dataset_phys(ds)->ds_bp,
 861                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
 862                     used, comp, uncomp, tx);
 863                 rrw_exit(&ds->ds_bp_rwlock, FTAG);
 864                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 865                     -used, -comp, -uncomp, tx);
 866                 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 867                     used, comp, uncomp, tx);
 868         }
 869
 870         if (ds->ds_prev != NULL) {
 871                 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 872                         VERIFY0(zap_remove_int(mos,
 873                             dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
 874                             ds->ds_object, tx));
 875                 }
 876                 prevobj = ds->ds_prev->ds_object;
 877                 dsl_dataset_rele(ds->ds_prev, ds);
 878                 ds->ds_prev = NULL;
 879         }
 880
 881         /*
 882          * This must be done after the dsl_traverse(), because it will
 883          * re-open the objset.
 884          */
 885         if (ds->ds_objset) {
 886                 dmu_objset_evict(ds->ds_objset);
 887                 ds->ds_objset = NULL;
 888         }
 889
 890         /* Erase the link in the dir */
 891         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 892         dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
 893         ddobj = ds->ds_dir->dd_object;
 894         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
 895         VERIFY0(zap_destroy(mos,
 896             dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
 897
 898         if (ds->ds_bookmarks != 0) {
 899                 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
 900                 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
 901         }
 902
 903         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 904
 905         ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
 906         ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
 907         ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
 908         dsl_dir_rele(ds->ds_dir, ds);
 909         ds->ds_dir = NULL;
 910         dmu_object_free_zapified(mos, obj, tx);
 911
 912         dsl_dir_destroy_sync(ddobj, tx);
 913
 914         if (rmorigin) {
 915                 dsl_dataset_t *prev;
 916                 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
 917                 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
 918                 dsl_dataset_rele(prev, FTAG);
 919         }
 920 }
 921
 922 void
 923 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
 924 {
 925         dsl_destroy_head_arg_t *ddha = arg;
 926         dsl_pool_t *dp = dmu_tx_pool(tx);
 927         dsl_dataset_t *ds;
 928
 929         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 930         dsl_destroy_head_sync_impl(ds, tx);
 931         dsl_dataset_rele(ds, FTAG);
 932 }
 933
 934 static void
 935 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
 936 {
 937         dsl_destroy_head_arg_t *ddha = arg;
 938         dsl_pool_t *dp = dmu_tx_pool(tx);
 939         dsl_dataset_t *ds;
 940
 941         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 942
 943         /* Mark it as inconsistent on-disk, in case we crash */
 944         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 945         dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
 946
 947         spa_history_log_internal_ds(ds, "destroy begin", tx, "");
 948         dsl_dataset_rele(ds, FTAG);
 949 }
 950
 951 int
 952 dsl_destroy_head(const char *name)
 953 {
 954         dsl_destroy_head_arg_t ddha;
 955         int error;
 956         spa_t *spa;
 957         boolean_t isenabled;
 958
 959 #ifdef _KERNEL
 960         zfs_destroy_unmount_origin(name);
 961 #endif
 962
 963         error = spa_open(name, &spa, FTAG);
 964         if (error != 0)
 965                 return (error);
 966         isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
 967         spa_close(spa, FTAG);
 968
 969         ddha.ddha_name = name;
 970
 971         if (!isenabled) {
 972                 objset_t *os;
 973
 974                 error = dsl_sync_task(name, dsl_destroy_head_check,
 975                     dsl_destroy_head_begin_sync, &ddha,
 976                     0, ZFS_SPACE_CHECK_NONE);
 977                 if (error != 0)
 978                         return (error);
 979
 980                 /*
 981                  * Head deletion is processed in one txg on old pools;
 982                  * remove the objects from open context so that the txg sync
 983                  * is not too long.
 984                  */
 985                 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
 986                 if (error == 0) {
 987                         uint64_t prev_snap_txg =
 988                             dsl_dataset_phys(dmu_objset_ds(os))->
 989                             ds_prev_snap_txg;
 990                         for (uint64_t obj = 0; error == 0;
 991                             error = dmu_object_next(os, &obj, FALSE,
 992                             prev_snap_txg))
 993                                 (void) dmu_free_long_object(os, obj);
 994                         /* sync out all frees */
 995                         txg_wait_synced(dmu_objset_pool(os), 0);
 996                         dmu_objset_disown(os, FTAG);
 997                 }
 998         }
 999
1000         return (dsl_sync_task(name, dsl_destroy_head_check,
1001             dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
1002 }
1003
1004 /*
1005  * Note, this function is used as the callback for dmu_objset_find().  We
1006  * always return 0 so that we will continue to find and process
1007  * inconsistent datasets, even if we encounter an error trying to
1008  * process one of them.
1009  */
1010 /* ARGSUSED */
1011 int
1012 dsl_destroy_inconsistent(const char *dsname, void *arg)
1013 {
1014         objset_t *os;
1015
1016         if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
1017                 boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
1018
1019                 /*
1020                  * If the dataset is inconsistent because a resumable receive
1021                  * has failed, then do not destroy it.
1022                  */
1023                 if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
1024                         need_destroy = B_FALSE;
1025
1026                 dmu_objset_rele(os, FTAG);
1027                 if (need_destroy)
1028                         (void) dsl_destroy_head(dsname);
1029         }
1030         return (0);
1031 }