4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
28 #include <sys/zfs_context.h>
29 #include <sys/dsl_userhold.h>
30 #include <sys/dsl_dataset.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_tx.h>
33 #include <sys/dsl_pool.h>
34 #include <sys/dsl_dir.h>
35 #include <sys/dmu_traverse.h>
36 #include <sys/dsl_scan.h>
37 #include <sys/dmu_objset.h>
39 #include <sys/zfeature.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/dsl_deleg.h>
42 #include <sys/dmu_impl.h>
44 typedef struct dmu_snapshots_destroy_arg
{
46 nvlist_t
*dsda_successful_snaps
;
48 nvlist_t
*dsda_errlist
;
49 } dmu_snapshots_destroy_arg_t
;
52 dsl_destroy_snapshot_check_impl(dsl_dataset_t
*ds
, boolean_t defer
)
54 if (!ds
->ds_is_snapshot
)
55 return (SET_ERROR(EINVAL
));
57 if (dsl_dataset_long_held(ds
))
58 return (SET_ERROR(EBUSY
));
61 * Only allow deferred destroy on pools that support it.
62 * NOTE: deferred destroy is only supported on snapshots.
65 if (spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) <
67 return (SET_ERROR(ENOTSUP
));
72 * If this snapshot has an elevated user reference count,
73 * we can't destroy it yet.
75 if (ds
->ds_userrefs
> 0)
76 return (SET_ERROR(EBUSY
));
79 * Can't delete a branch point.
81 if (dsl_dataset_phys(ds
)->ds_num_children
> 1)
82 return (SET_ERROR(EEXIST
));
88 dsl_destroy_snapshot_check(void *arg
, dmu_tx_t
*tx
)
90 dmu_snapshots_destroy_arg_t
*dsda
= arg
;
91 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
95 if (!dmu_tx_is_syncing(tx
))
98 for (pair
= nvlist_next_nvpair(dsda
->dsda_snaps
, NULL
);
99 pair
!= NULL
; pair
= nvlist_next_nvpair(dsda
->dsda_snaps
, pair
)) {
102 error
= dsl_dataset_hold(dp
, nvpair_name(pair
),
106 * If the snapshot does not exist, silently ignore it
107 * (it's "already destroyed").
113 error
= dsl_destroy_snapshot_check_impl(ds
,
115 dsl_dataset_rele(ds
, FTAG
);
119 fnvlist_add_boolean(dsda
->dsda_successful_snaps
,
122 fnvlist_add_int32(dsda
->dsda_errlist
,
123 nvpair_name(pair
), error
);
127 pair
= nvlist_next_nvpair(dsda
->dsda_errlist
, NULL
);
129 return (fnvpair_value_int32(pair
));
134 struct process_old_arg
{
136 dsl_dataset_t
*ds_prev
;
137 boolean_t after_branch_point
;
139 uint64_t used
, comp
, uncomp
;
143 process_old_cb(void *arg
, const blkptr_t
*bp
, dmu_tx_t
*tx
)
145 struct process_old_arg
*poa
= arg
;
146 dsl_pool_t
*dp
= poa
->ds
->ds_dir
->dd_pool
;
148 ASSERT(!BP_IS_HOLE(bp
));
150 if (bp
->blk_birth
<= dsl_dataset_phys(poa
->ds
)->ds_prev_snap_txg
) {
151 dsl_deadlist_insert(&poa
->ds
->ds_deadlist
, bp
, tx
);
152 if (poa
->ds_prev
&& !poa
->after_branch_point
&&
154 dsl_dataset_phys(poa
->ds_prev
)->ds_prev_snap_txg
) {
155 dsl_dataset_phys(poa
->ds_prev
)->ds_unique_bytes
+=
156 bp_get_dsize_sync(dp
->dp_spa
, bp
);
159 poa
->used
+= bp_get_dsize_sync(dp
->dp_spa
, bp
);
160 poa
->comp
+= BP_GET_PSIZE(bp
);
161 poa
->uncomp
+= BP_GET_UCSIZE(bp
);
162 dsl_free_sync(poa
->pio
, dp
, tx
->tx_txg
, bp
);
168 process_old_deadlist(dsl_dataset_t
*ds
, dsl_dataset_t
*ds_prev
,
169 dsl_dataset_t
*ds_next
, boolean_t after_branch_point
, dmu_tx_t
*tx
)
171 struct process_old_arg poa
= { 0 };
172 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
173 objset_t
*mos
= dp
->dp_meta_objset
;
174 uint64_t deadlist_obj
;
176 ASSERT(ds
->ds_deadlist
.dl_oldfmt
);
177 ASSERT(ds_next
->ds_deadlist
.dl_oldfmt
);
180 poa
.ds_prev
= ds_prev
;
181 poa
.after_branch_point
= after_branch_point
;
182 poa
.pio
= zio_root(dp
->dp_spa
, NULL
, NULL
, ZIO_FLAG_MUSTSUCCEED
);
183 VERIFY0(bpobj_iterate(&ds_next
->ds_deadlist
.dl_bpobj
,
184 process_old_cb
, &poa
, tx
));
185 VERIFY0(zio_wait(poa
.pio
));
186 ASSERT3U(poa
.used
, ==, dsl_dataset_phys(ds
)->ds_unique_bytes
);
188 /* change snapused */
189 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_SNAP
,
190 -poa
.used
, -poa
.comp
, -poa
.uncomp
, tx
);
192 /* swap next's deadlist to our deadlist */
193 dsl_deadlist_close(&ds
->ds_deadlist
);
194 dsl_deadlist_close(&ds_next
->ds_deadlist
);
195 deadlist_obj
= dsl_dataset_phys(ds
)->ds_deadlist_obj
;
196 dsl_dataset_phys(ds
)->ds_deadlist_obj
=
197 dsl_dataset_phys(ds_next
)->ds_deadlist_obj
;
198 dsl_dataset_phys(ds_next
)->ds_deadlist_obj
= deadlist_obj
;
199 dsl_deadlist_open(&ds
->ds_deadlist
, mos
,
200 dsl_dataset_phys(ds
)->ds_deadlist_obj
);
201 dsl_deadlist_open(&ds_next
->ds_deadlist
, mos
,
202 dsl_dataset_phys(ds_next
)->ds_deadlist_obj
);
206 dsl_dataset_remove_clones_key(dsl_dataset_t
*ds
, uint64_t mintxg
, dmu_tx_t
*tx
)
208 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
213 * If it is the old version, dd_clones doesn't exist so we can't
214 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
217 if (dsl_dir_phys(ds
->ds_dir
)->dd_clones
== 0)
220 for (zap_cursor_init(&zc
, mos
, dsl_dir_phys(ds
->ds_dir
)->dd_clones
);
221 zap_cursor_retrieve(&zc
, &za
) == 0;
222 zap_cursor_advance(&zc
)) {
223 dsl_dataset_t
*clone
;
225 VERIFY0(dsl_dataset_hold_obj(ds
->ds_dir
->dd_pool
,
226 za
.za_first_integer
, FTAG
, &clone
));
227 if (clone
->ds_dir
->dd_origin_txg
> mintxg
) {
228 dsl_deadlist_remove_key(&clone
->ds_deadlist
,
230 dsl_dataset_remove_clones_key(clone
, mintxg
, tx
);
232 dsl_dataset_rele(clone
, FTAG
);
234 zap_cursor_fini(&zc
);
238 dsl_destroy_snapshot_sync_impl(dsl_dataset_t
*ds
, boolean_t defer
, dmu_tx_t
*tx
)
241 int after_branch_point
= FALSE
;
242 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
243 objset_t
*mos
= dp
->dp_meta_objset
;
244 dsl_dataset_t
*ds_prev
= NULL
;
247 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
248 ASSERT3U(dsl_dataset_phys(ds
)->ds_bp
.blk_birth
, <=, tx
->tx_txg
);
249 ASSERT(refcount_is_zero(&ds
->ds_longholds
));
252 (ds
->ds_userrefs
> 0 ||
253 dsl_dataset_phys(ds
)->ds_num_children
> 1)) {
254 ASSERT(spa_version(dp
->dp_spa
) >= SPA_VERSION_USERREFS
);
255 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
256 dsl_dataset_phys(ds
)->ds_flags
|= DS_FLAG_DEFER_DESTROY
;
257 spa_history_log_internal_ds(ds
, "defer_destroy", tx
, "");
261 ASSERT3U(dsl_dataset_phys(ds
)->ds_num_children
, <=, 1);
263 /* We need to log before removing it from the namespace. */
264 spa_history_log_internal_ds(ds
, "destroy", tx
, "");
266 dsl_scan_ds_destroyed(ds
, tx
);
270 if (ds
->ds_large_blocks
) {
271 ASSERT0(zap_contains(mos
, obj
, DS_FIELD_LARGE_BLOCKS
));
272 spa_feature_decr(dp
->dp_spa
, SPA_FEATURE_LARGE_BLOCKS
, tx
);
274 if (dsl_dataset_phys(ds
)->ds_prev_snap_obj
!= 0) {
275 ASSERT3P(ds
->ds_prev
, ==, NULL
);
276 VERIFY0(dsl_dataset_hold_obj(dp
,
277 dsl_dataset_phys(ds
)->ds_prev_snap_obj
, FTAG
, &ds_prev
));
279 (dsl_dataset_phys(ds_prev
)->ds_next_snap_obj
!= obj
);
281 dmu_buf_will_dirty(ds_prev
->ds_dbuf
, tx
);
282 if (after_branch_point
&&
283 dsl_dataset_phys(ds_prev
)->ds_next_clones_obj
!= 0) {
284 dsl_dataset_remove_from_next_clones(ds_prev
, obj
, tx
);
285 if (dsl_dataset_phys(ds
)->ds_next_snap_obj
!= 0) {
286 VERIFY0(zap_add_int(mos
,
287 dsl_dataset_phys(ds_prev
)->
289 dsl_dataset_phys(ds
)->ds_next_snap_obj
,
293 if (!after_branch_point
) {
294 dsl_dataset_phys(ds_prev
)->ds_next_snap_obj
=
295 dsl_dataset_phys(ds
)->ds_next_snap_obj
;
299 dsl_dataset_t
*ds_next
;
301 uint64_t used
= 0, comp
= 0, uncomp
= 0;
303 VERIFY0(dsl_dataset_hold_obj(dp
,
304 dsl_dataset_phys(ds
)->ds_next_snap_obj
, FTAG
, &ds_next
));
305 ASSERT3U(dsl_dataset_phys(ds_next
)->ds_prev_snap_obj
, ==, obj
);
307 old_unique
= dsl_dataset_phys(ds_next
)->ds_unique_bytes
;
309 dmu_buf_will_dirty(ds_next
->ds_dbuf
, tx
);
310 dsl_dataset_phys(ds_next
)->ds_prev_snap_obj
=
311 dsl_dataset_phys(ds
)->ds_prev_snap_obj
;
312 dsl_dataset_phys(ds_next
)->ds_prev_snap_txg
=
313 dsl_dataset_phys(ds
)->ds_prev_snap_txg
;
314 ASSERT3U(dsl_dataset_phys(ds
)->ds_prev_snap_txg
, ==,
315 ds_prev
? dsl_dataset_phys(ds_prev
)->ds_creation_txg
: 0);
317 if (ds_next
->ds_deadlist
.dl_oldfmt
) {
318 process_old_deadlist(ds
, ds_prev
, ds_next
,
319 after_branch_point
, tx
);
321 /* Adjust prev's unique space. */
322 if (ds_prev
&& !after_branch_point
) {
323 dsl_deadlist_space_range(&ds_next
->ds_deadlist
,
324 dsl_dataset_phys(ds_prev
)->ds_prev_snap_txg
,
325 dsl_dataset_phys(ds
)->ds_prev_snap_txg
,
326 &used
, &comp
, &uncomp
);
327 dsl_dataset_phys(ds_prev
)->ds_unique_bytes
+= used
;
330 /* Adjust snapused. */
331 dsl_deadlist_space_range(&ds_next
->ds_deadlist
,
332 dsl_dataset_phys(ds
)->ds_prev_snap_txg
, UINT64_MAX
,
333 &used
, &comp
, &uncomp
);
334 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_SNAP
,
335 -used
, -comp
, -uncomp
, tx
);
337 /* Move blocks to be freed to pool's free list. */
338 dsl_deadlist_move_bpobj(&ds_next
->ds_deadlist
,
339 &dp
->dp_free_bpobj
, dsl_dataset_phys(ds
)->ds_prev_snap_txg
,
341 dsl_dir_diduse_space(tx
->tx_pool
->dp_free_dir
,
342 DD_USED_HEAD
, used
, comp
, uncomp
, tx
);
344 /* Merge our deadlist into next's and free it. */
345 dsl_deadlist_merge(&ds_next
->ds_deadlist
,
346 dsl_dataset_phys(ds
)->ds_deadlist_obj
, tx
);
348 dsl_deadlist_close(&ds
->ds_deadlist
);
349 dsl_deadlist_free(mos
, dsl_dataset_phys(ds
)->ds_deadlist_obj
, tx
);
350 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
351 dsl_dataset_phys(ds
)->ds_deadlist_obj
= 0;
353 /* Collapse range in clone heads */
354 dsl_dataset_remove_clones_key(ds
,
355 dsl_dataset_phys(ds
)->ds_creation_txg
, tx
);
357 if (ds_next
->ds_is_snapshot
) {
358 dsl_dataset_t
*ds_nextnext
;
361 * Update next's unique to include blocks which
362 * were previously shared by only this snapshot
363 * and it. Those blocks will be born after the
364 * prev snap and before this snap, and will have
365 * died after the next snap and before the one
366 * after that (ie. be on the snap after next's
369 VERIFY0(dsl_dataset_hold_obj(dp
,
370 dsl_dataset_phys(ds_next
)->ds_next_snap_obj
,
371 FTAG
, &ds_nextnext
));
372 dsl_deadlist_space_range(&ds_nextnext
->ds_deadlist
,
373 dsl_dataset_phys(ds
)->ds_prev_snap_txg
,
374 dsl_dataset_phys(ds
)->ds_creation_txg
,
375 &used
, &comp
, &uncomp
);
376 dsl_dataset_phys(ds_next
)->ds_unique_bytes
+= used
;
377 dsl_dataset_rele(ds_nextnext
, FTAG
);
378 ASSERT3P(ds_next
->ds_prev
, ==, NULL
);
380 /* Collapse range in this head. */
382 VERIFY0(dsl_dataset_hold_obj(dp
,
383 dsl_dir_phys(ds
->ds_dir
)->dd_head_dataset_obj
, FTAG
, &hds
));
384 dsl_deadlist_remove_key(&hds
->ds_deadlist
,
385 dsl_dataset_phys(ds
)->ds_creation_txg
, tx
);
386 dsl_dataset_rele(hds
, FTAG
);
389 ASSERT3P(ds_next
->ds_prev
, ==, ds
);
390 dsl_dataset_rele(ds_next
->ds_prev
, ds_next
);
391 ds_next
->ds_prev
= NULL
;
393 VERIFY0(dsl_dataset_hold_obj(dp
,
394 dsl_dataset_phys(ds
)->ds_prev_snap_obj
,
395 ds_next
, &ds_next
->ds_prev
));
398 dsl_dataset_recalc_head_uniq(ds_next
);
401 * Reduce the amount of our unconsumed refreservation
402 * being charged to our parent by the amount of
403 * new unique data we have gained.
405 if (old_unique
< ds_next
->ds_reserved
) {
407 uint64_t new_unique
=
408 dsl_dataset_phys(ds_next
)->ds_unique_bytes
;
410 ASSERT(old_unique
<= new_unique
);
411 mrsdelta
= MIN(new_unique
- old_unique
,
412 ds_next
->ds_reserved
- old_unique
);
413 dsl_dir_diduse_space(ds
->ds_dir
,
414 DD_USED_REFRSRV
, -mrsdelta
, 0, 0, tx
);
417 dsl_dataset_rele(ds_next
, FTAG
);
420 * This must be done after the dsl_traverse(), because it will
421 * re-open the objset.
424 dmu_objset_evict(ds
->ds_objset
);
425 ds
->ds_objset
= NULL
;
428 /* remove from snapshot namespace */
429 dsl_dataset_t
*ds_head
;
430 ASSERT(dsl_dataset_phys(ds
)->ds_snapnames_zapobj
== 0);
431 VERIFY0(dsl_dataset_hold_obj(dp
,
432 dsl_dir_phys(ds
->ds_dir
)->dd_head_dataset_obj
, FTAG
, &ds_head
));
433 VERIFY0(dsl_dataset_get_snapname(ds
));
438 err
= dsl_dataset_snap_lookup(ds_head
,
439 ds
->ds_snapname
, &val
);
441 ASSERT3U(val
, ==, obj
);
444 VERIFY0(dsl_dataset_snap_remove(ds_head
, ds
->ds_snapname
, tx
, B_TRUE
));
445 dsl_dataset_rele(ds_head
, FTAG
);
448 dsl_dataset_rele(ds_prev
, FTAG
);
450 spa_prop_clear_bootfs(dp
->dp_spa
, ds
->ds_object
, tx
);
452 if (dsl_dataset_phys(ds
)->ds_next_clones_obj
!= 0) {
454 ASSERT0(zap_count(mos
,
455 dsl_dataset_phys(ds
)->ds_next_clones_obj
, &count
) &&
457 VERIFY0(dmu_object_free(mos
,
458 dsl_dataset_phys(ds
)->ds_next_clones_obj
, tx
));
460 if (dsl_dataset_phys(ds
)->ds_props_obj
!= 0)
461 VERIFY0(zap_destroy(mos
, dsl_dataset_phys(ds
)->ds_props_obj
,
463 if (dsl_dataset_phys(ds
)->ds_userrefs_obj
!= 0)
464 VERIFY0(zap_destroy(mos
, dsl_dataset_phys(ds
)->ds_userrefs_obj
,
466 dsl_dir_rele(ds
->ds_dir
, ds
);
468 dmu_object_free_zapified(mos
, obj
, tx
);
472 dsl_destroy_snapshot_sync(void *arg
, dmu_tx_t
*tx
)
474 dmu_snapshots_destroy_arg_t
*dsda
= arg
;
475 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
478 for (pair
= nvlist_next_nvpair(dsda
->dsda_successful_snaps
, NULL
);
480 pair
= nvlist_next_nvpair(dsda
->dsda_successful_snaps
, pair
)) {
483 VERIFY0(dsl_dataset_hold(dp
, nvpair_name(pair
), FTAG
, &ds
));
485 dsl_destroy_snapshot_sync_impl(ds
, dsda
->dsda_defer
, tx
);
486 dsl_dataset_rele(ds
, FTAG
);
491 * The semantics of this function are described in the comment above
492 * lzc_destroy_snaps(). To summarize:
494 * The snapshots must all be in the same pool.
496 * Snapshots that don't exist will be silently ignored (considered to be
497 * "already deleted").
499 * On success, all snaps will be destroyed and this will return 0.
500 * On failure, no snaps will be destroyed, the errlist will be filled in,
501 * and this will return an errno.
504 dsl_destroy_snapshots_nvl(nvlist_t
*snaps
, boolean_t defer
,
507 dmu_snapshots_destroy_arg_t dsda
;
511 pair
= nvlist_next_nvpair(snaps
, NULL
);
515 dsda
.dsda_snaps
= snaps
;
516 dsda
.dsda_successful_snaps
= fnvlist_alloc();
517 dsda
.dsda_defer
= defer
;
518 dsda
.dsda_errlist
= errlist
;
520 error
= dsl_sync_task(nvpair_name(pair
),
521 dsl_destroy_snapshot_check
, dsl_destroy_snapshot_sync
,
522 &dsda
, 0, ZFS_SPACE_CHECK_NONE
);
523 fnvlist_free(dsda
.dsda_successful_snaps
);
529 dsl_destroy_snapshot(const char *name
, boolean_t defer
)
532 nvlist_t
*nvl
= fnvlist_alloc();
533 nvlist_t
*errlist
= fnvlist_alloc();
535 fnvlist_add_boolean(nvl
, name
);
536 error
= dsl_destroy_snapshots_nvl(nvl
, defer
, errlist
);
537 fnvlist_free(errlist
);
549 kill_blkptr(spa_t
*spa
, zilog_t
*zilog
, const blkptr_t
*bp
,
550 const zbookmark_phys_t
*zb
, const dnode_phys_t
*dnp
, void *arg
)
552 struct killarg
*ka
= arg
;
553 dmu_tx_t
*tx
= ka
->tx
;
555 if (bp
== NULL
|| BP_IS_HOLE(bp
) || BP_IS_EMBEDDED(bp
))
558 if (zb
->zb_level
== ZB_ZIL_LEVEL
) {
559 ASSERT(zilog
!= NULL
);
561 * It's a block in the intent log. It has no
562 * accounting, so just free it.
564 dsl_free(ka
->tx
->tx_pool
, ka
->tx
->tx_txg
, bp
);
566 ASSERT(zilog
== NULL
);
567 ASSERT3U(bp
->blk_birth
, >,
568 dsl_dataset_phys(ka
->ds
)->ds_prev_snap_txg
);
569 (void) dsl_dataset_block_kill(ka
->ds
, bp
, tx
, B_FALSE
);
576 old_synchronous_dataset_destroy(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
581 * Free everything that we point to (that's born after
582 * the previous snapshot, if we are a clone)
584 * NB: this should be very quick, because we already
585 * freed all the objects in open context.
589 VERIFY0(traverse_dataset(ds
,
590 dsl_dataset_phys(ds
)->ds_prev_snap_txg
, TRAVERSE_POST
,
592 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds
) ||
593 dsl_dataset_phys(ds
)->ds_unique_bytes
== 0);
596 typedef struct dsl_destroy_head_arg
{
597 const char *ddha_name
;
598 } dsl_destroy_head_arg_t
;
601 dsl_destroy_head_check_impl(dsl_dataset_t
*ds
, int expected_holds
)
607 ASSERT(!ds
->ds_is_snapshot
);
608 if (ds
->ds_is_snapshot
)
609 return (SET_ERROR(EINVAL
));
611 if (refcount_count(&ds
->ds_longholds
) != expected_holds
)
612 return (SET_ERROR(EBUSY
));
614 mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
617 * Can't delete a head dataset if there are snapshots of it.
618 * (Except if the only snapshots are from the branch we cloned
621 if (ds
->ds_prev
!= NULL
&&
622 dsl_dataset_phys(ds
->ds_prev
)->ds_next_snap_obj
== ds
->ds_object
)
623 return (SET_ERROR(EBUSY
));
626 * Can't delete if there are children of this fs.
628 error
= zap_count(mos
,
629 dsl_dir_phys(ds
->ds_dir
)->dd_child_dir_zapobj
, &count
);
633 return (SET_ERROR(EEXIST
));
635 if (dsl_dir_is_clone(ds
->ds_dir
) && DS_IS_DEFER_DESTROY(ds
->ds_prev
) &&
636 dsl_dataset_phys(ds
->ds_prev
)->ds_num_children
== 2 &&
637 ds
->ds_prev
->ds_userrefs
== 0) {
638 /* We need to remove the origin snapshot as well. */
639 if (!refcount_is_zero(&ds
->ds_prev
->ds_longholds
))
640 return (SET_ERROR(EBUSY
));
646 dsl_destroy_head_check(void *arg
, dmu_tx_t
*tx
)
648 dsl_destroy_head_arg_t
*ddha
= arg
;
649 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
653 error
= dsl_dataset_hold(dp
, ddha
->ddha_name
, FTAG
, &ds
);
657 error
= dsl_destroy_head_check_impl(ds
, 0);
658 dsl_dataset_rele(ds
, FTAG
);
663 dsl_dir_destroy_sync(uint64_t ddobj
, dmu_tx_t
*tx
)
666 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
667 objset_t
*mos
= dp
->dp_meta_objset
;
670 ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx
)->dp_config_rwlock
));
672 VERIFY0(dsl_dir_hold_obj(dp
, ddobj
, NULL
, FTAG
, &dd
));
674 ASSERT0(dsl_dir_phys(dd
)->dd_head_dataset_obj
);
677 * Decrement the filesystem count for all parent filesystems.
679 * When we receive an incremental stream into a filesystem that already
680 * exists, a temporary clone is created. We never count this temporary
681 * clone, whose name begins with a '%'.
683 if (dd
->dd_myname
[0] != '%' && dd
->dd_parent
!= NULL
)
684 dsl_fs_ss_count_adjust(dd
->dd_parent
, -1,
685 DD_FIELD_FILESYSTEM_COUNT
, tx
);
688 * Remove our reservation. The impl() routine avoids setting the
689 * actual property, which would require the (already destroyed) ds.
691 dsl_dir_set_reservation_sync_impl(dd
, 0, tx
);
693 ASSERT0(dsl_dir_phys(dd
)->dd_used_bytes
);
694 ASSERT0(dsl_dir_phys(dd
)->dd_reserved
);
695 for (t
= 0; t
< DD_USED_NUM
; t
++)
696 ASSERT0(dsl_dir_phys(dd
)->dd_used_breakdown
[t
]);
698 VERIFY0(zap_destroy(mos
, dsl_dir_phys(dd
)->dd_child_dir_zapobj
, tx
));
699 VERIFY0(zap_destroy(mos
, dsl_dir_phys(dd
)->dd_props_zapobj
, tx
));
700 VERIFY0(dsl_deleg_destroy(mos
, dsl_dir_phys(dd
)->dd_deleg_zapobj
, tx
));
701 VERIFY0(zap_remove(mos
,
702 dsl_dir_phys(dd
->dd_parent
)->dd_child_dir_zapobj
,
705 dsl_dir_rele(dd
, FTAG
);
706 dmu_object_free_zapified(mos
, ddobj
, tx
);
710 dsl_destroy_head_sync_impl(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
712 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
713 objset_t
*mos
= dp
->dp_meta_objset
;
714 uint64_t obj
, ddobj
, prevobj
= 0;
717 ASSERT3U(dsl_dataset_phys(ds
)->ds_num_children
, <=, 1);
718 ASSERT(ds
->ds_prev
== NULL
||
719 dsl_dataset_phys(ds
->ds_prev
)->ds_next_snap_obj
!= ds
->ds_object
);
720 ASSERT3U(dsl_dataset_phys(ds
)->ds_bp
.blk_birth
, <=, tx
->tx_txg
);
721 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
723 /* We need to log before removing it from the namespace. */
724 spa_history_log_internal_ds(ds
, "destroy", tx
, "");
726 rmorigin
= (dsl_dir_is_clone(ds
->ds_dir
) &&
727 DS_IS_DEFER_DESTROY(ds
->ds_prev
) &&
728 dsl_dataset_phys(ds
->ds_prev
)->ds_num_children
== 2 &&
729 ds
->ds_prev
->ds_userrefs
== 0);
731 /* Remove our reservation. */
732 if (ds
->ds_reserved
!= 0) {
733 dsl_dataset_set_refreservation_sync_impl(ds
,
734 (ZPROP_SRC_NONE
| ZPROP_SRC_LOCAL
| ZPROP_SRC_RECEIVED
),
736 ASSERT0(ds
->ds_reserved
);
739 if (ds
->ds_large_blocks
)
740 spa_feature_decr(dp
->dp_spa
, SPA_FEATURE_LARGE_BLOCKS
, tx
);
742 dsl_scan_ds_destroyed(ds
, tx
);
746 if (dsl_dataset_phys(ds
)->ds_prev_snap_obj
!= 0) {
747 /* This is a clone */
748 ASSERT(ds
->ds_prev
!= NULL
);
749 ASSERT3U(dsl_dataset_phys(ds
->ds_prev
)->ds_next_snap_obj
, !=,
751 ASSERT0(dsl_dataset_phys(ds
)->ds_next_snap_obj
);
753 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
754 if (dsl_dataset_phys(ds
->ds_prev
)->ds_next_clones_obj
!= 0) {
755 dsl_dataset_remove_from_next_clones(ds
->ds_prev
,
759 ASSERT3U(dsl_dataset_phys(ds
->ds_prev
)->ds_num_children
, >, 1);
760 dsl_dataset_phys(ds
->ds_prev
)->ds_num_children
--;
764 * Destroy the deadlist. Unless it's a clone, the
765 * deadlist should be empty. (If it's a clone, it's
766 * safe to ignore the deadlist contents.)
768 dsl_deadlist_close(&ds
->ds_deadlist
);
769 dsl_deadlist_free(mos
, dsl_dataset_phys(ds
)->ds_deadlist_obj
, tx
);
770 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
771 dsl_dataset_phys(ds
)->ds_deadlist_obj
= 0;
774 VERIFY0(dmu_objset_from_ds(ds
, &os
));
776 if (!spa_feature_is_enabled(dp
->dp_spa
, SPA_FEATURE_ASYNC_DESTROY
)) {
777 old_synchronous_dataset_destroy(ds
, tx
);
780 * Move the bptree into the pool's list of trees to
781 * clean up and update space accounting information.
783 uint64_t used
, comp
, uncomp
;
785 zil_destroy_sync(dmu_objset_zil(os
), tx
);
787 if (!spa_feature_is_active(dp
->dp_spa
,
788 SPA_FEATURE_ASYNC_DESTROY
)) {
789 dsl_scan_t
*scn
= dp
->dp_scan
;
790 spa_feature_incr(dp
->dp_spa
, SPA_FEATURE_ASYNC_DESTROY
,
792 dp
->dp_bptree_obj
= bptree_alloc(mos
, tx
);
794 DMU_POOL_DIRECTORY_OBJECT
,
795 DMU_POOL_BPTREE_OBJ
, sizeof (uint64_t), 1,
796 &dp
->dp_bptree_obj
, tx
));
797 ASSERT(!scn
->scn_async_destroying
);
798 scn
->scn_async_destroying
= B_TRUE
;
801 used
= dsl_dir_phys(ds
->ds_dir
)->dd_used_bytes
;
802 comp
= dsl_dir_phys(ds
->ds_dir
)->dd_compressed_bytes
;
803 uncomp
= dsl_dir_phys(ds
->ds_dir
)->dd_uncompressed_bytes
;
805 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds
) ||
806 dsl_dataset_phys(ds
)->ds_unique_bytes
== used
);
808 bptree_add(mos
, dp
->dp_bptree_obj
,
809 &dsl_dataset_phys(ds
)->ds_bp
,
810 dsl_dataset_phys(ds
)->ds_prev_snap_txg
,
811 used
, comp
, uncomp
, tx
);
812 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_HEAD
,
813 -used
, -comp
, -uncomp
, tx
);
814 dsl_dir_diduse_space(dp
->dp_free_dir
, DD_USED_HEAD
,
815 used
, comp
, uncomp
, tx
);
818 if (ds
->ds_prev
!= NULL
) {
819 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_DIR_CLONES
) {
820 VERIFY0(zap_remove_int(mos
,
821 dsl_dir_phys(ds
->ds_prev
->ds_dir
)->dd_clones
,
824 prevobj
= ds
->ds_prev
->ds_object
;
825 dsl_dataset_rele(ds
->ds_prev
, ds
);
830 * This must be done after the dsl_traverse(), because it will
831 * re-open the objset.
834 dmu_objset_evict(ds
->ds_objset
);
835 ds
->ds_objset
= NULL
;
838 /* Erase the link in the dir */
839 dmu_buf_will_dirty(ds
->ds_dir
->dd_dbuf
, tx
);
840 dsl_dir_phys(ds
->ds_dir
)->dd_head_dataset_obj
= 0;
841 ddobj
= ds
->ds_dir
->dd_object
;
842 ASSERT(dsl_dataset_phys(ds
)->ds_snapnames_zapobj
!= 0);
843 VERIFY0(zap_destroy(mos
,
844 dsl_dataset_phys(ds
)->ds_snapnames_zapobj
, tx
));
846 if (ds
->ds_bookmarks
!= 0) {
847 VERIFY0(zap_destroy(mos
, ds
->ds_bookmarks
, tx
));
848 spa_feature_decr(dp
->dp_spa
, SPA_FEATURE_BOOKMARKS
, tx
);
851 spa_prop_clear_bootfs(dp
->dp_spa
, ds
->ds_object
, tx
);
853 ASSERT0(dsl_dataset_phys(ds
)->ds_next_clones_obj
);
854 ASSERT0(dsl_dataset_phys(ds
)->ds_props_obj
);
855 ASSERT0(dsl_dataset_phys(ds
)->ds_userrefs_obj
);
856 dsl_dir_rele(ds
->ds_dir
, ds
);
858 dmu_object_free_zapified(mos
, obj
, tx
);
860 dsl_dir_destroy_sync(ddobj
, tx
);
864 VERIFY0(dsl_dataset_hold_obj(dp
, prevobj
, FTAG
, &prev
));
865 dsl_destroy_snapshot_sync_impl(prev
, B_FALSE
, tx
);
866 dsl_dataset_rele(prev
, FTAG
);
871 dsl_destroy_head_sync(void *arg
, dmu_tx_t
*tx
)
873 dsl_destroy_head_arg_t
*ddha
= arg
;
874 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
877 VERIFY0(dsl_dataset_hold(dp
, ddha
->ddha_name
, FTAG
, &ds
));
878 dsl_destroy_head_sync_impl(ds
, tx
);
879 dsl_dataset_rele(ds
, FTAG
);
883 dsl_destroy_head_begin_sync(void *arg
, dmu_tx_t
*tx
)
885 dsl_destroy_head_arg_t
*ddha
= arg
;
886 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
889 VERIFY0(dsl_dataset_hold(dp
, ddha
->ddha_name
, FTAG
, &ds
));
891 /* Mark it as inconsistent on-disk, in case we crash */
892 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
893 dsl_dataset_phys(ds
)->ds_flags
|= DS_FLAG_INCONSISTENT
;
895 spa_history_log_internal_ds(ds
, "destroy begin", tx
, "");
896 dsl_dataset_rele(ds
, FTAG
);
900 dsl_destroy_head(const char *name
)
902 dsl_destroy_head_arg_t ddha
;
908 zfs_destroy_unmount_origin(name
);
911 error
= spa_open(name
, &spa
, FTAG
);
914 isenabled
= spa_feature_is_enabled(spa
, SPA_FEATURE_ASYNC_DESTROY
);
915 spa_close(spa
, FTAG
);
917 ddha
.ddha_name
= name
;
922 error
= dsl_sync_task(name
, dsl_destroy_head_check
,
923 dsl_destroy_head_begin_sync
, &ddha
,
924 0, ZFS_SPACE_CHECK_NONE
);
929 * Head deletion is processed in one txg on old pools;
930 * remove the objects from open context so that the txg sync
933 error
= dmu_objset_own(name
, DMU_OST_ANY
, B_FALSE
, FTAG
, &os
);
935 uint64_t prev_snap_txg
=
936 dsl_dataset_phys(dmu_objset_ds(os
))->
938 for (uint64_t obj
= 0; error
== 0;
939 error
= dmu_object_next(os
, &obj
, FALSE
,
941 (void) dmu_free_long_object(os
, obj
);
942 /* sync out all frees */
943 txg_wait_synced(dmu_objset_pool(os
), 0);
944 dmu_objset_disown(os
, FTAG
);
948 return (dsl_sync_task(name
, dsl_destroy_head_check
,
949 dsl_destroy_head_sync
, &ddha
, 0, ZFS_SPACE_CHECK_NONE
));
953 * Note, this function is used as the callback for dmu_objset_find(). We
954 * always return 0 so that we will continue to find and process
955 * inconsistent datasets, even if we encounter an error trying to
956 * process one of them.
960 dsl_destroy_inconsistent(const char *dsname
, void *arg
)
964 if (dmu_objset_hold(dsname
, FTAG
, &os
) == 0) {
965 boolean_t inconsistent
= DS_IS_INCONSISTENT(dmu_objset_ds(os
));
966 dmu_objset_rele(os
, FTAG
);
968 (void) dsl_destroy_head(dsname
);