Merge commit '720b16875295d57e0e6a4e0ec32db4d47412f896'
[unleashed.git] / kernel / fs / zfs / range_tree.c
blob6422fd1c1fa6ecf2a9283fefeabca772f6b0a76a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
36 kmem_cache_t *range_seg_cache;
38 void
39 range_tree_init(void)
41 ASSERT(range_seg_cache == NULL);
42 range_seg_cache = kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
46 void
47 range_tree_fini(void)
49 kmem_cache_destroy(range_seg_cache);
50 range_seg_cache = NULL;
53 void
54 range_tree_stat_verify(range_tree_t *rt)
56 range_seg_t *rs;
57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58 int i;
60 for (rs = avl_first(&rt->rt_root); rs != NULL;
61 rs = AVL_NEXT(&rt->rt_root, rs)) {
62 uint64_t size = rs->rs_end - rs->rs_start;
63 int idx = highbit64(size) - 1;
65 hist[idx]++;
66 ASSERT3U(hist[idx], !=, 0);
69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 if (hist[i] != rt->rt_histogram[i]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i, hist, hist[i], rt->rt_histogram[i]);
74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
78 static void
79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
81 uint64_t size = rs->rs_end - rs->rs_start;
82 int idx = highbit64(size) - 1;
84 ASSERT(size != 0);
85 ASSERT3U(idx, <,
86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
88 ASSERT(MUTEX_HELD(rt->rt_lock));
89 rt->rt_histogram[idx]++;
90 ASSERT3U(rt->rt_histogram[idx], !=, 0);
93 static void
94 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
96 uint64_t size = rs->rs_end - rs->rs_start;
97 int idx = highbit64(size) - 1;
99 ASSERT(size != 0);
100 ASSERT3U(idx, <,
101 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
103 ASSERT(MUTEX_HELD(rt->rt_lock));
104 ASSERT3U(rt->rt_histogram[idx], !=, 0);
105 rt->rt_histogram[idx]--;
109 * NOTE: caller is responsible for all locking.
111 static int
112 range_tree_seg_compare(const void *x1, const void *x2)
114 const range_seg_t *r1 = x1;
115 const range_seg_t *r2 = x2;
117 if (r1->rs_start < r2->rs_start) {
118 if (r1->rs_end > r2->rs_start)
119 return (0);
120 return (-1);
122 if (r1->rs_start > r2->rs_start) {
123 if (r1->rs_start < r2->rs_end)
124 return (0);
125 return (1);
127 return (0);
130 range_tree_t *
131 range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp)
133 range_tree_t *rt;
135 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
137 avl_create(&rt->rt_root, range_tree_seg_compare,
138 sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
140 rt->rt_lock = lp;
141 rt->rt_ops = ops;
142 rt->rt_arg = arg;
144 if (rt->rt_ops != NULL)
145 rt->rt_ops->rtop_create(rt, rt->rt_arg);
147 return (rt);
150 void
151 range_tree_destroy(range_tree_t *rt)
153 VERIFY0(rt->rt_space);
155 if (rt->rt_ops != NULL)
156 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
158 avl_destroy(&rt->rt_root);
159 kmem_free(rt, sizeof (*rt));
162 void
163 range_tree_add(void *arg, uint64_t start, uint64_t size)
165 range_tree_t *rt = arg;
166 avl_index_t where;
167 range_seg_t rsearch, *rs_before, *rs_after, *rs;
168 uint64_t end = start + size;
169 boolean_t merge_before, merge_after;
171 ASSERT(MUTEX_HELD(rt->rt_lock));
172 VERIFY(size != 0);
174 rsearch.rs_start = start;
175 rsearch.rs_end = end;
176 rs = avl_find(&rt->rt_root, &rsearch, &where);
178 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
179 zfs_panic_recover("zfs: allocating allocated segment"
180 "(offset=%llu size=%llu)\n",
181 (longlong_t)start, (longlong_t)size);
182 return;
185 /* Make sure we don't overlap with either of our neighbors */
186 VERIFY(rs == NULL);
188 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
189 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
191 merge_before = (rs_before != NULL && rs_before->rs_end == start);
192 merge_after = (rs_after != NULL && rs_after->rs_start == end);
194 if (merge_before && merge_after) {
195 avl_remove(&rt->rt_root, rs_before);
196 if (rt->rt_ops != NULL) {
197 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
198 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
201 range_tree_stat_decr(rt, rs_before);
202 range_tree_stat_decr(rt, rs_after);
204 rs_after->rs_start = rs_before->rs_start;
205 kmem_cache_free(range_seg_cache, rs_before);
206 rs = rs_after;
207 } else if (merge_before) {
208 if (rt->rt_ops != NULL)
209 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
211 range_tree_stat_decr(rt, rs_before);
213 rs_before->rs_end = end;
214 rs = rs_before;
215 } else if (merge_after) {
216 if (rt->rt_ops != NULL)
217 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
219 range_tree_stat_decr(rt, rs_after);
221 rs_after->rs_start = start;
222 rs = rs_after;
223 } else {
224 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
225 rs->rs_start = start;
226 rs->rs_end = end;
227 avl_insert(&rt->rt_root, rs, where);
230 if (rt->rt_ops != NULL)
231 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
233 range_tree_stat_incr(rt, rs);
234 rt->rt_space += size;
237 void
238 range_tree_remove(void *arg, uint64_t start, uint64_t size)
240 range_tree_t *rt = arg;
241 avl_index_t where;
242 range_seg_t rsearch, *rs, *newseg;
243 uint64_t end = start + size;
244 boolean_t left_over, right_over;
246 ASSERT(MUTEX_HELD(rt->rt_lock));
247 VERIFY3U(size, !=, 0);
248 VERIFY3U(size, <=, rt->rt_space);
250 rsearch.rs_start = start;
251 rsearch.rs_end = end;
252 rs = avl_find(&rt->rt_root, &rsearch, &where);
254 /* Make sure we completely overlap with someone */
255 if (rs == NULL) {
256 zfs_panic_recover("zfs: freeing free segment "
257 "(offset=%llu size=%llu)",
258 (longlong_t)start, (longlong_t)size);
259 return;
261 VERIFY3U(rs->rs_start, <=, start);
262 VERIFY3U(rs->rs_end, >=, end);
264 left_over = (rs->rs_start != start);
265 right_over = (rs->rs_end != end);
267 range_tree_stat_decr(rt, rs);
269 if (rt->rt_ops != NULL)
270 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
272 if (left_over && right_over) {
273 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
274 newseg->rs_start = end;
275 newseg->rs_end = rs->rs_end;
276 range_tree_stat_incr(rt, newseg);
278 rs->rs_end = start;
280 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
281 if (rt->rt_ops != NULL)
282 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
283 } else if (left_over) {
284 rs->rs_end = start;
285 } else if (right_over) {
286 rs->rs_start = end;
287 } else {
288 avl_remove(&rt->rt_root, rs);
289 kmem_cache_free(range_seg_cache, rs);
290 rs = NULL;
293 if (rs != NULL) {
294 range_tree_stat_incr(rt, rs);
296 if (rt->rt_ops != NULL)
297 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
300 rt->rt_space -= size;
303 static range_seg_t *
304 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
306 avl_index_t where;
307 range_seg_t rsearch;
308 uint64_t end = start + size;
310 ASSERT(MUTEX_HELD(rt->rt_lock));
311 VERIFY(size != 0);
313 rsearch.rs_start = start;
314 rsearch.rs_end = end;
315 return (avl_find(&rt->rt_root, &rsearch, &where));
318 static range_seg_t *
319 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
321 range_seg_t *rs = range_tree_find_impl(rt, start, size);
322 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
323 return (rs);
324 return (NULL);
327 void
328 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
330 range_seg_t *rs;
332 mutex_enter(rt->rt_lock);
333 rs = range_tree_find(rt, off, size);
334 if (rs != NULL)
335 panic("freeing free block; rs=%p", (void *)rs);
336 mutex_exit(rt->rt_lock);
339 boolean_t
340 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
342 return (range_tree_find(rt, start, size) != NULL);
346 * Ensure that this range is not in the tree, regardless of whether
347 * it is currently in the tree.
349 void
350 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
352 range_seg_t *rs;
354 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
355 uint64_t free_start = MAX(rs->rs_start, start);
356 uint64_t free_end = MIN(rs->rs_end, start + size);
357 range_tree_remove(rt, free_start, free_end - free_start);
361 void
362 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
364 range_tree_t *rt;
366 ASSERT(MUTEX_HELD((*rtsrc)->rt_lock));
367 ASSERT0(range_tree_space(*rtdst));
368 ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
370 rt = *rtsrc;
371 *rtsrc = *rtdst;
372 *rtdst = rt;
375 void
376 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
378 range_seg_t *rs;
379 void *cookie = NULL;
381 ASSERT(MUTEX_HELD(rt->rt_lock));
383 if (rt->rt_ops != NULL)
384 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
386 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
387 if (func != NULL)
388 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
389 kmem_cache_free(range_seg_cache, rs);
392 bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
393 rt->rt_space = 0;
396 void
397 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
399 range_seg_t *rs;
401 ASSERT(MUTEX_HELD(rt->rt_lock));
403 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
404 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
407 uint64_t
408 range_tree_space(range_tree_t *rt)
410 return (rt->rt_space);