4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29 #include <sys/zfs_context.h>
32 #include <sys/dnode.h>
34 #include <sys/range_tree.h>
36 kmem_cache_t
*range_seg_cache
;
41 ASSERT(range_seg_cache
== NULL
);
42 range_seg_cache
= kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
49 kmem_cache_destroy(range_seg_cache
);
50 range_seg_cache
= NULL
;
54 range_tree_stat_verify(range_tree_t
*rt
)
57 uint64_t hist
[RANGE_TREE_HISTOGRAM_SIZE
] = { 0 };
60 for (rs
= avl_first(&rt
->rt_root
); rs
!= NULL
;
61 rs
= AVL_NEXT(&rt
->rt_root
, rs
)) {
62 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
63 int idx
= highbit64(size
) - 1;
66 ASSERT3U(hist
[idx
], !=, 0);
69 for (i
= 0; i
< RANGE_TREE_HISTOGRAM_SIZE
; i
++) {
70 if (hist
[i
] != rt
->rt_histogram
[i
]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i
, hist
, hist
[i
], rt
->rt_histogram
[i
]);
74 VERIFY3U(hist
[i
], ==, rt
->rt_histogram
[i
]);
79 range_tree_stat_incr(range_tree_t
*rt
, range_seg_t
*rs
)
81 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
82 int idx
= highbit64(size
) - 1;
86 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
88 ASSERT(MUTEX_HELD(rt
->rt_lock
));
89 rt
->rt_histogram
[idx
]++;
90 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
94 range_tree_stat_decr(range_tree_t
*rt
, range_seg_t
*rs
)
96 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
97 int idx
= highbit64(size
) - 1;
101 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
103 ASSERT(MUTEX_HELD(rt
->rt_lock
));
104 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
105 rt
->rt_histogram
[idx
]--;
109 * NOTE: caller is responsible for all locking.
112 range_tree_seg_compare(const void *x1
, const void *x2
)
114 const range_seg_t
*r1
= x1
;
115 const range_seg_t
*r2
= x2
;
117 if (r1
->rs_start
< r2
->rs_start
) {
118 if (r1
->rs_end
> r2
->rs_start
)
122 if (r1
->rs_start
> r2
->rs_start
) {
123 if (r1
->rs_start
< r2
->rs_end
)
131 range_tree_create(range_tree_ops_t
*ops
, void *arg
, kmutex_t
*lp
)
135 rt
= kmem_zalloc(sizeof (range_tree_t
), KM_SLEEP
);
137 avl_create(&rt
->rt_root
, range_tree_seg_compare
,
138 sizeof (range_seg_t
), offsetof(range_seg_t
, rs_node
));
144 if (rt
->rt_ops
!= NULL
)
145 rt
->rt_ops
->rtop_create(rt
, rt
->rt_arg
);
151 range_tree_destroy(range_tree_t
*rt
)
153 VERIFY0(rt
->rt_space
);
155 if (rt
->rt_ops
!= NULL
)
156 rt
->rt_ops
->rtop_destroy(rt
, rt
->rt_arg
);
158 avl_destroy(&rt
->rt_root
);
159 kmem_free(rt
, sizeof (*rt
));
163 range_tree_add(void *arg
, uint64_t start
, uint64_t size
)
165 range_tree_t
*rt
= arg
;
167 range_seg_t rsearch
, *rs_before
, *rs_after
, *rs
;
168 uint64_t end
= start
+ size
;
169 boolean_t merge_before
, merge_after
;
171 ASSERT(MUTEX_HELD(rt
->rt_lock
));
174 rsearch
.rs_start
= start
;
175 rsearch
.rs_end
= end
;
176 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
178 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= end
) {
179 zfs_panic_recover("zfs: allocating allocated segment"
180 "(offset=%llu size=%llu)\n",
181 (longlong_t
)start
, (longlong_t
)size
);
185 /* Make sure we don't overlap with either of our neighbors */
188 rs_before
= avl_nearest(&rt
->rt_root
, where
, AVL_BEFORE
);
189 rs_after
= avl_nearest(&rt
->rt_root
, where
, AVL_AFTER
);
191 merge_before
= (rs_before
!= NULL
&& rs_before
->rs_end
== start
);
192 merge_after
= (rs_after
!= NULL
&& rs_after
->rs_start
== end
);
194 if (merge_before
&& merge_after
) {
195 avl_remove(&rt
->rt_root
, rs_before
);
196 if (rt
->rt_ops
!= NULL
) {
197 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
198 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
201 range_tree_stat_decr(rt
, rs_before
);
202 range_tree_stat_decr(rt
, rs_after
);
204 rs_after
->rs_start
= rs_before
->rs_start
;
205 kmem_cache_free(range_seg_cache
, rs_before
);
207 } else if (merge_before
) {
208 if (rt
->rt_ops
!= NULL
)
209 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
211 range_tree_stat_decr(rt
, rs_before
);
213 rs_before
->rs_end
= end
;
215 } else if (merge_after
) {
216 if (rt
->rt_ops
!= NULL
)
217 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
219 range_tree_stat_decr(rt
, rs_after
);
221 rs_after
->rs_start
= start
;
224 rs
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
225 rs
->rs_start
= start
;
227 avl_insert(&rt
->rt_root
, rs
, where
);
230 if (rt
->rt_ops
!= NULL
)
231 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
233 range_tree_stat_incr(rt
, rs
);
234 rt
->rt_space
+= size
;
238 range_tree_remove(void *arg
, uint64_t start
, uint64_t size
)
240 range_tree_t
*rt
= arg
;
242 range_seg_t rsearch
, *rs
, *newseg
;
243 uint64_t end
= start
+ size
;
244 boolean_t left_over
, right_over
;
246 ASSERT(MUTEX_HELD(rt
->rt_lock
));
247 VERIFY3U(size
, !=, 0);
248 VERIFY3U(size
, <=, rt
->rt_space
);
250 rsearch
.rs_start
= start
;
251 rsearch
.rs_end
= end
;
252 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
254 /* Make sure we completely overlap with someone */
256 zfs_panic_recover("zfs: freeing free segment "
257 "(offset=%llu size=%llu)",
258 (longlong_t
)start
, (longlong_t
)size
);
261 VERIFY3U(rs
->rs_start
, <=, start
);
262 VERIFY3U(rs
->rs_end
, >=, end
);
264 left_over
= (rs
->rs_start
!= start
);
265 right_over
= (rs
->rs_end
!= end
);
267 range_tree_stat_decr(rt
, rs
);
269 if (rt
->rt_ops
!= NULL
)
270 rt
->rt_ops
->rtop_remove(rt
, rs
, rt
->rt_arg
);
272 if (left_over
&& right_over
) {
273 newseg
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
274 newseg
->rs_start
= end
;
275 newseg
->rs_end
= rs
->rs_end
;
276 range_tree_stat_incr(rt
, newseg
);
280 avl_insert_here(&rt
->rt_root
, newseg
, rs
, AVL_AFTER
);
281 if (rt
->rt_ops
!= NULL
)
282 rt
->rt_ops
->rtop_add(rt
, newseg
, rt
->rt_arg
);
283 } else if (left_over
) {
285 } else if (right_over
) {
288 avl_remove(&rt
->rt_root
, rs
);
289 kmem_cache_free(range_seg_cache
, rs
);
294 range_tree_stat_incr(rt
, rs
);
296 if (rt
->rt_ops
!= NULL
)
297 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
300 rt
->rt_space
-= size
;
304 range_tree_find_impl(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
308 uint64_t end
= start
+ size
;
310 ASSERT(MUTEX_HELD(rt
->rt_lock
));
313 rsearch
.rs_start
= start
;
314 rsearch
.rs_end
= end
;
315 return (avl_find(&rt
->rt_root
, &rsearch
, &where
));
319 range_tree_find(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
321 range_seg_t
*rs
= range_tree_find_impl(rt
, start
, size
);
322 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= start
+ size
)
328 range_tree_verify(range_tree_t
*rt
, uint64_t off
, uint64_t size
)
332 mutex_enter(rt
->rt_lock
);
333 rs
= range_tree_find(rt
, off
, size
);
335 panic("freeing free block; rs=%p", (void *)rs
);
336 mutex_exit(rt
->rt_lock
);
340 range_tree_contains(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
342 return (range_tree_find(rt
, start
, size
) != NULL
);
346 * Ensure that this range is not in the tree, regardless of whether
347 * it is currently in the tree.
350 range_tree_clear(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
354 while ((rs
= range_tree_find_impl(rt
, start
, size
)) != NULL
) {
355 uint64_t free_start
= MAX(rs
->rs_start
, start
);
356 uint64_t free_end
= MIN(rs
->rs_end
, start
+ size
);
357 range_tree_remove(rt
, free_start
, free_end
- free_start
);
362 range_tree_swap(range_tree_t
**rtsrc
, range_tree_t
**rtdst
)
366 ASSERT(MUTEX_HELD((*rtsrc
)->rt_lock
));
367 ASSERT0(range_tree_space(*rtdst
));
368 ASSERT0(avl_numnodes(&(*rtdst
)->rt_root
));
376 range_tree_vacate(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
381 ASSERT(MUTEX_HELD(rt
->rt_lock
));
383 if (rt
->rt_ops
!= NULL
)
384 rt
->rt_ops
->rtop_vacate(rt
, rt
->rt_arg
);
386 while ((rs
= avl_destroy_nodes(&rt
->rt_root
, &cookie
)) != NULL
) {
388 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
389 kmem_cache_free(range_seg_cache
, rs
);
392 bzero(rt
->rt_histogram
, sizeof (rt
->rt_histogram
));
397 range_tree_walk(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
401 ASSERT(MUTEX_HELD(rt
->rt_lock
));
403 for (rs
= avl_first(&rt
->rt_root
); rs
; rs
= AVL_NEXT(&rt
->rt_root
, rs
))
404 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
408 range_tree_space(range_tree_t
*rt
)
410 return (rt
->rt_space
);