4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
29 #include <sys/zfs_context.h>
32 #include <sys/dnode.h>
34 #include <sys/range_tree.h>
36 kmem_cache_t
*range_seg_cache
;
41 ASSERT(range_seg_cache
== NULL
);
42 range_seg_cache
= kmem_cache_create("range_seg_cache",
43 sizeof (range_seg_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
49 kmem_cache_destroy(range_seg_cache
);
50 range_seg_cache
= NULL
;
54 range_tree_stat_verify(range_tree_t
*rt
)
57 uint64_t hist
[RANGE_TREE_HISTOGRAM_SIZE
] = { 0 };
60 for (rs
= avl_first(&rt
->rt_root
); rs
!= NULL
;
61 rs
= AVL_NEXT(&rt
->rt_root
, rs
)) {
62 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
63 int idx
= highbit64(size
) - 1;
66 ASSERT3U(hist
[idx
], !=, 0);
69 for (i
= 0; i
< RANGE_TREE_HISTOGRAM_SIZE
; i
++) {
70 if (hist
[i
] != rt
->rt_histogram
[i
]) {
71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 i
, hist
, hist
[i
], rt
->rt_histogram
[i
]);
74 VERIFY3U(hist
[i
], ==, rt
->rt_histogram
[i
]);
79 range_tree_stat_incr(range_tree_t
*rt
, range_seg_t
*rs
)
81 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
82 int idx
= highbit64(size
) - 1;
86 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
88 rt
->rt_histogram
[idx
]++;
89 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
93 range_tree_stat_decr(range_tree_t
*rt
, range_seg_t
*rs
)
95 uint64_t size
= rs
->rs_end
- rs
->rs_start
;
96 int idx
= highbit64(size
) - 1;
100 sizeof (rt
->rt_histogram
) / sizeof (*rt
->rt_histogram
));
102 ASSERT3U(rt
->rt_histogram
[idx
], !=, 0);
103 rt
->rt_histogram
[idx
]--;
107 * NOTE: caller is responsible for all locking.
110 range_tree_seg_compare(const void *x1
, const void *x2
)
112 const range_seg_t
*r1
= x1
;
113 const range_seg_t
*r2
= x2
;
115 if (r1
->rs_start
< r2
->rs_start
) {
116 if (r1
->rs_end
> r2
->rs_start
)
120 if (r1
->rs_start
> r2
->rs_start
) {
121 if (r1
->rs_start
< r2
->rs_end
)
129 range_tree_create(range_tree_ops_t
*ops
, void *arg
)
133 rt
= kmem_zalloc(sizeof (range_tree_t
), KM_SLEEP
);
135 avl_create(&rt
->rt_root
, range_tree_seg_compare
,
136 sizeof (range_seg_t
), offsetof(range_seg_t
, rs_node
));
141 if (rt
->rt_ops
!= NULL
)
142 rt
->rt_ops
->rtop_create(rt
, rt
->rt_arg
);
148 range_tree_destroy(range_tree_t
*rt
)
150 VERIFY0(rt
->rt_space
);
152 if (rt
->rt_ops
!= NULL
)
153 rt
->rt_ops
->rtop_destroy(rt
, rt
->rt_arg
);
155 avl_destroy(&rt
->rt_root
);
156 kmem_free(rt
, sizeof (*rt
));
160 range_tree_add(void *arg
, uint64_t start
, uint64_t size
)
162 range_tree_t
*rt
= arg
;
164 range_seg_t rsearch
, *rs_before
, *rs_after
, *rs
;
165 uint64_t end
= start
+ size
;
166 boolean_t merge_before
, merge_after
;
170 rsearch
.rs_start
= start
;
171 rsearch
.rs_end
= end
;
172 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
174 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= end
) {
175 zfs_panic_recover("zfs: allocating allocated segment"
176 "(offset=%llu size=%llu)\n",
177 (longlong_t
)start
, (longlong_t
)size
);
181 /* Make sure we don't overlap with either of our neighbors */
184 rs_before
= avl_nearest(&rt
->rt_root
, where
, AVL_BEFORE
);
185 rs_after
= avl_nearest(&rt
->rt_root
, where
, AVL_AFTER
);
187 merge_before
= (rs_before
!= NULL
&& rs_before
->rs_end
== start
);
188 merge_after
= (rs_after
!= NULL
&& rs_after
->rs_start
== end
);
190 if (merge_before
&& merge_after
) {
191 avl_remove(&rt
->rt_root
, rs_before
);
192 if (rt
->rt_ops
!= NULL
) {
193 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
194 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
197 range_tree_stat_decr(rt
, rs_before
);
198 range_tree_stat_decr(rt
, rs_after
);
200 rs_after
->rs_start
= rs_before
->rs_start
;
201 kmem_cache_free(range_seg_cache
, rs_before
);
203 } else if (merge_before
) {
204 if (rt
->rt_ops
!= NULL
)
205 rt
->rt_ops
->rtop_remove(rt
, rs_before
, rt
->rt_arg
);
207 range_tree_stat_decr(rt
, rs_before
);
209 rs_before
->rs_end
= end
;
211 } else if (merge_after
) {
212 if (rt
->rt_ops
!= NULL
)
213 rt
->rt_ops
->rtop_remove(rt
, rs_after
, rt
->rt_arg
);
215 range_tree_stat_decr(rt
, rs_after
);
217 rs_after
->rs_start
= start
;
220 rs
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
221 rs
->rs_start
= start
;
223 avl_insert(&rt
->rt_root
, rs
, where
);
226 if (rt
->rt_ops
!= NULL
)
227 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
229 range_tree_stat_incr(rt
, rs
);
230 rt
->rt_space
+= size
;
234 range_tree_remove(void *arg
, uint64_t start
, uint64_t size
)
236 range_tree_t
*rt
= arg
;
238 range_seg_t rsearch
, *rs
, *newseg
;
239 uint64_t end
= start
+ size
;
240 boolean_t left_over
, right_over
;
242 VERIFY3U(size
, !=, 0);
243 VERIFY3U(size
, <=, rt
->rt_space
);
245 rsearch
.rs_start
= start
;
246 rsearch
.rs_end
= end
;
247 rs
= avl_find(&rt
->rt_root
, &rsearch
, &where
);
249 /* Make sure we completely overlap with someone */
251 zfs_panic_recover("zfs: freeing free segment "
252 "(offset=%llu size=%llu)",
253 (longlong_t
)start
, (longlong_t
)size
);
256 VERIFY3U(rs
->rs_start
, <=, start
);
257 VERIFY3U(rs
->rs_end
, >=, end
);
259 left_over
= (rs
->rs_start
!= start
);
260 right_over
= (rs
->rs_end
!= end
);
262 range_tree_stat_decr(rt
, rs
);
264 if (rt
->rt_ops
!= NULL
)
265 rt
->rt_ops
->rtop_remove(rt
, rs
, rt
->rt_arg
);
267 if (left_over
&& right_over
) {
268 newseg
= kmem_cache_alloc(range_seg_cache
, KM_SLEEP
);
269 newseg
->rs_start
= end
;
270 newseg
->rs_end
= rs
->rs_end
;
271 range_tree_stat_incr(rt
, newseg
);
275 avl_insert_here(&rt
->rt_root
, newseg
, rs
, AVL_AFTER
);
276 if (rt
->rt_ops
!= NULL
)
277 rt
->rt_ops
->rtop_add(rt
, newseg
, rt
->rt_arg
);
278 } else if (left_over
) {
280 } else if (right_over
) {
283 avl_remove(&rt
->rt_root
, rs
);
284 kmem_cache_free(range_seg_cache
, rs
);
289 range_tree_stat_incr(rt
, rs
);
291 if (rt
->rt_ops
!= NULL
)
292 rt
->rt_ops
->rtop_add(rt
, rs
, rt
->rt_arg
);
295 rt
->rt_space
-= size
;
299 range_tree_find_impl(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
303 uint64_t end
= start
+ size
;
307 rsearch
.rs_start
= start
;
308 rsearch
.rs_end
= end
;
309 return (avl_find(&rt
->rt_root
, &rsearch
, &where
));
313 range_tree_find(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
315 range_seg_t
*rs
= range_tree_find_impl(rt
, start
, size
);
316 if (rs
!= NULL
&& rs
->rs_start
<= start
&& rs
->rs_end
>= start
+ size
)
322 range_tree_verify(range_tree_t
*rt
, uint64_t off
, uint64_t size
)
326 rs
= range_tree_find(rt
, off
, size
);
328 panic("freeing free block; rs=%p", (void *)rs
);
332 range_tree_contains(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
334 return (range_tree_find(rt
, start
, size
) != NULL
);
338 * Ensure that this range is not in the tree, regardless of whether
339 * it is currently in the tree.
342 range_tree_clear(range_tree_t
*rt
, uint64_t start
, uint64_t size
)
349 while ((rs
= range_tree_find_impl(rt
, start
, size
)) != NULL
) {
350 uint64_t free_start
= MAX(rs
->rs_start
, start
);
351 uint64_t free_end
= MIN(rs
->rs_end
, start
+ size
);
352 range_tree_remove(rt
, free_start
, free_end
- free_start
);
357 range_tree_swap(range_tree_t
**rtsrc
, range_tree_t
**rtdst
)
361 ASSERT0(range_tree_space(*rtdst
));
362 ASSERT0(avl_numnodes(&(*rtdst
)->rt_root
));
370 range_tree_vacate(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
376 if (rt
->rt_ops
!= NULL
)
377 rt
->rt_ops
->rtop_vacate(rt
, rt
->rt_arg
);
379 while ((rs
= avl_destroy_nodes(&rt
->rt_root
, &cookie
)) != NULL
) {
381 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
382 kmem_cache_free(range_seg_cache
, rs
);
385 bzero(rt
->rt_histogram
, sizeof (rt
->rt_histogram
));
390 range_tree_walk(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
)
395 for (rs
= avl_first(&rt
->rt_root
); rs
; rs
= AVL_NEXT(&rt
->rt_root
, rs
))
396 func(arg
, rs
->rs_start
, rs
->rs_end
- rs
->rs_start
);
400 range_tree_space(range_tree_t
*rt
)
402 return (rt
->rt_space
);