7614 zfs device evacuation/removal
[unleashed.git] / usr / src / uts / common / fs / zfs / vdev_indirect_births.c
blobfbecbe830929fc33a544c276cdc696bbab3527bb
1 /*
2 * CDDL HEADER START
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
13 * CDDL HEADER END
17 * Copyright (c) 2015 by Delphix. All rights reserved.
20 #include <sys/dmu_tx.h>
21 #include <sys/spa.h>
22 #include <sys/dmu.h>
23 #include <sys/dsl_pool.h>
24 #include <sys/vdev_indirect_births.h>
26 static boolean_t
27 vdev_indirect_births_verify(vdev_indirect_births_t *vib)
29 ASSERT(vib != NULL);
31 ASSERT(vib->vib_object != 0);
32 ASSERT(vib->vib_objset != NULL);
33 ASSERT(vib->vib_phys != NULL);
34 ASSERT(vib->vib_dbuf != NULL);
36 EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
38 return (B_TRUE);
41 uint64_t
42 vdev_indirect_births_count(vdev_indirect_births_t *vib)
44 ASSERT(vdev_indirect_births_verify(vib));
46 return (vib->vib_phys->vib_count);
49 uint64_t
50 vdev_indirect_births_object(vdev_indirect_births_t *vib)
52 ASSERT(vdev_indirect_births_verify(vib));
54 return (vib->vib_object);
57 static uint64_t
58 vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
60 return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
63 void
64 vdev_indirect_births_close(vdev_indirect_births_t *vib)
66 ASSERT(vdev_indirect_births_verify(vib));
68 if (vib->vib_phys->vib_count > 0) {
69 uint64_t births_size = vdev_indirect_births_size_impl(vib);
71 kmem_free(vib->vib_entries, births_size);
72 vib->vib_entries = NULL;
75 dmu_buf_rele(vib->vib_dbuf, vib);
77 vib->vib_objset = NULL;
78 vib->vib_object = 0;
79 vib->vib_dbuf = NULL;
80 vib->vib_phys = NULL;
82 kmem_free(vib, sizeof (*vib));
85 uint64_t
86 vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
88 ASSERT(dmu_tx_is_syncing(tx));
90 return (dmu_object_alloc(os,
91 DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
92 DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
93 tx));
96 vdev_indirect_births_t *
97 vdev_indirect_births_open(objset_t *os, uint64_t births_object)
99 vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
101 vib->vib_objset = os;
102 vib->vib_object = births_object;
104 VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
105 vib->vib_phys = vib->vib_dbuf->db_data;
107 if (vib->vib_phys->vib_count > 0) {
108 uint64_t births_size = vdev_indirect_births_size_impl(vib);
109 vib->vib_entries = kmem_alloc(births_size, KM_SLEEP);
110 VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
111 births_size, vib->vib_entries, DMU_READ_PREFETCH));
114 ASSERT(vdev_indirect_births_verify(vib));
116 return (vib);
119 void
120 vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
122 VERIFY0(dmu_object_free(os, object, tx));
125 void
126 vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
127 uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
129 vdev_indirect_birth_entry_phys_t vibe;
130 uint64_t old_size;
131 uint64_t new_size;
132 vdev_indirect_birth_entry_phys_t *new_entries;
134 ASSERT(dmu_tx_is_syncing(tx));
135 ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
136 ASSERT(vdev_indirect_births_verify(vib));
138 dmu_buf_will_dirty(vib->vib_dbuf, tx);
140 vibe.vibe_offset = max_offset;
141 vibe.vibe_phys_birth_txg = txg;
143 old_size = vdev_indirect_births_size_impl(vib);
144 dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
145 &vibe, tx);
146 vib->vib_phys->vib_count++;
147 new_size = vdev_indirect_births_size_impl(vib);
149 new_entries = kmem_alloc(new_size, KM_SLEEP);
150 if (old_size > 0) {
151 bcopy(vib->vib_entries, new_entries, old_size);
152 kmem_free(vib->vib_entries, old_size);
154 new_entries[vib->vib_phys->vib_count - 1] = vibe;
155 vib->vib_entries = new_entries;
158 uint64_t
159 vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
161 ASSERT(vdev_indirect_births_verify(vib));
162 ASSERT(vib->vib_phys->vib_count > 0);
164 vdev_indirect_birth_entry_phys_t *last =
165 &vib->vib_entries[vib->vib_phys->vib_count - 1];
166 return (last->vibe_phys_birth_txg);
170 * Return the txg in which the given range was copied (i.e. its physical
171 * birth txg). The specified offset+asize must be contiguously mapped
172 * (i.e. not a split block).
174 * The entries are sorted by increasing phys_birth, and also by increasing
175 * offset. We find the specified offset by binary search. Note that we
176 * can not use bsearch() because looking at each entry independently is
177 * insufficient to find the correct entry. Each entry implicitly relies
178 * on the previous entry: an entry indicates that the offsets from the
179 * end of the previous entry to the end of this entry were written in the
180 * specified txg.
182 uint64_t
183 vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
184 uint64_t asize)
186 vdev_indirect_birth_entry_phys_t *base;
187 vdev_indirect_birth_entry_phys_t *last;
189 ASSERT(vdev_indirect_births_verify(vib));
190 ASSERT(vib->vib_phys->vib_count > 0);
192 base = vib->vib_entries;
193 last = base + vib->vib_phys->vib_count - 1;
195 ASSERT3U(offset, <, last->vibe_offset);
197 while (last >= base) {
198 vdev_indirect_birth_entry_phys_t *p =
199 base + ((last - base) / 2);
200 if (offset >= p->vibe_offset) {
201 base = p + 1;
202 } else if (p == vib->vib_entries ||
203 offset >= (p - 1)->vibe_offset) {
204 ASSERT3U(offset + asize, <=, p->vibe_offset);
205 return (p->vibe_phys_birth_txg);
206 } else {
207 last = p - 1;
210 ASSERT(!"offset not found");
211 return (-1);