8416 abd.h is not C++ friendly
[unleashed.git] / usr / src / uts / common / fs / zfs / multilist.c
blob36b9feba84d39c5b61b2622de2d1dca31fb412bf
1 /*
2 * CDDL HEADER START
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
13 * CDDL HEADER END
16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
22 /* needed for spa_get_random() */
23 #include <sys/spa.h>
26 * This overrides the number of sublists in each multilist_t, which defaults
27 * to the number of CPUs in the system (see multilist_create()).
29 int zfs_multilist_num_sublists = 0;
32 * Given the object contained on the list, return a pointer to the
33 * object's multilist_node_t structure it contains.
35 static multilist_node_t *
36 multilist_d2l(multilist_t *ml, void *obj)
38 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
42 * Initialize a new mutlilist using the parameters specified.
44 * - 'size' denotes the size of the structure containing the
45 * multilist_node_t.
46 * - 'offset' denotes the byte offset of the mutlilist_node_t within
47 * the structure that contains it.
48 * - 'num' specifies the number of internal sublists to create.
49 * - 'index_func' is used to determine which sublist to insert into
50 * when the multilist_insert() function is called; as well as which
51 * sublist to remove from when multilist_remove() is called. The
52 * requirements this function must meet, are the following:
54 * - It must always return the same value when called on the same
55 * object (to ensure the object is removed from the list it was
56 * inserted into).
58 * - It must return a value in the range [0, number of sublists).
59 * The multilist_get_num_sublists() function may be used to
60 * determine the number of sublists in the multilist.
62 * Also, in order to reduce internal contention between the sublists
63 * during insertion and removal, this function should choose evenly
64 * between all available sublists when inserting. This isn't a hard
65 * requirement, but a general rule of thumb in order to garner the
66 * best multi-threaded performance out of the data structure.
68 static multilist_t *
69 multilist_create_impl(size_t size, size_t offset,
70 unsigned int num, multilist_sublist_index_func_t *index_func)
72 ASSERT3U(size, >, 0);
73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
74 ASSERT3U(num, >, 0);
75 ASSERT3P(index_func, !=, NULL);
77 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
78 ml->ml_offset = offset;
79 ml->ml_num_sublists = num;
80 ml->ml_index_func = index_func;
82 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
83 ml->ml_num_sublists, KM_SLEEP);
85 ASSERT3P(ml->ml_sublists, !=, NULL);
87 for (int i = 0; i < ml->ml_num_sublists; i++) {
88 multilist_sublist_t *mls = &ml->ml_sublists[i];
89 mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
90 list_create(&mls->mls_list, size, offset);
92 return (ml);
96 * Allocate a new multilist, using the default number of sublists
97 * (the number of CPUs, or at least 4, or the tunable
98 * zfs_multilist_num_sublists).
100 multilist_t *
101 multilist_create(size_t size, size_t offset,
102 multilist_sublist_index_func_t *index_func)
104 int num_sublists;
106 if (zfs_multilist_num_sublists > 0) {
107 num_sublists = zfs_multilist_num_sublists;
108 } else {
109 num_sublists = MAX(boot_ncpus, 4);
112 return (multilist_create_impl(size, offset, num_sublists, index_func));
116 * Destroy the given multilist object, and free up any memory it holds.
118 void
119 multilist_destroy(multilist_t *ml)
121 ASSERT(multilist_is_empty(ml));
123 for (int i = 0; i < ml->ml_num_sublists; i++) {
124 multilist_sublist_t *mls = &ml->ml_sublists[i];
126 ASSERT(list_is_empty(&mls->mls_list));
128 list_destroy(&mls->mls_list);
129 mutex_destroy(&mls->mls_lock);
132 ASSERT3P(ml->ml_sublists, !=, NULL);
133 kmem_free(ml->ml_sublists,
134 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
136 ml->ml_num_sublists = 0;
137 ml->ml_offset = 0;
138 kmem_free(ml, sizeof (multilist_t));
142 * Insert the given object into the multilist.
144 * This function will insert the object specified into the sublist
145 * determined using the function given at multilist creation time.
147 * The sublist locks are automatically acquired if not already held, to
148 * ensure consistency when inserting and removing from multiple threads.
150 void
151 multilist_insert(multilist_t *ml, void *obj)
153 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
154 multilist_sublist_t *mls;
155 boolean_t need_lock;
157 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
158 unsigned int, sublist_idx, void *, obj);
160 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
162 mls = &ml->ml_sublists[sublist_idx];
165 * Note: Callers may already hold the sublist lock by calling
166 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
167 * returning TRUE if and only if the current thread holds the
168 * lock. While it's a little ugly to make the lock recursive in
169 * this way, it works and allows the calling code to be much
170 * simpler -- otherwise it would have to pass around a flag
171 * indicating that it already has the lock.
173 need_lock = !MUTEX_HELD(&mls->mls_lock);
175 if (need_lock)
176 mutex_enter(&mls->mls_lock);
178 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
180 multilist_sublist_insert_head(mls, obj);
182 if (need_lock)
183 mutex_exit(&mls->mls_lock);
187 * Remove the given object from the multilist.
189 * This function will remove the object specified from the sublist
190 * determined using the function given at multilist creation time.
192 * The necessary sublist locks are automatically acquired, to ensure
193 * consistency when inserting and removing from multiple threads.
195 void
196 multilist_remove(multilist_t *ml, void *obj)
198 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
199 multilist_sublist_t *mls;
200 boolean_t need_lock;
202 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
203 unsigned int, sublist_idx, void *, obj);
205 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
207 mls = &ml->ml_sublists[sublist_idx];
208 /* See comment in multilist_insert(). */
209 need_lock = !MUTEX_HELD(&mls->mls_lock);
211 if (need_lock)
212 mutex_enter(&mls->mls_lock);
214 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
216 multilist_sublist_remove(mls, obj);
218 if (need_lock)
219 mutex_exit(&mls->mls_lock);
223 * Check to see if this multilist object is empty.
225 * This will return TRUE if it finds all of the sublists of this
226 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
227 * automatically acquired as necessary.
229 * If concurrent insertions and removals are occurring, the semantics
230 * of this function become a little fuzzy. Instead of locking all
231 * sublists for the entire call time of the function, each sublist is
232 * only locked as it is individually checked for emptiness. Thus, it's
233 * possible for this function to return TRUE with non-empty sublists at
234 * the time the function returns. This would be due to another thread
235 * inserting into a given sublist, after that specific sublist was check
236 * and deemed empty, but before all sublists have been checked.
239 multilist_is_empty(multilist_t *ml)
241 for (int i = 0; i < ml->ml_num_sublists; i++) {
242 multilist_sublist_t *mls = &ml->ml_sublists[i];
243 /* See comment in multilist_insert(). */
244 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
246 if (need_lock)
247 mutex_enter(&mls->mls_lock);
249 if (!list_is_empty(&mls->mls_list)) {
250 if (need_lock)
251 mutex_exit(&mls->mls_lock);
253 return (FALSE);
256 if (need_lock)
257 mutex_exit(&mls->mls_lock);
260 return (TRUE);
263 /* Return the number of sublists composing this multilist */
264 unsigned int
265 multilist_get_num_sublists(multilist_t *ml)
267 return (ml->ml_num_sublists);
270 /* Return a randomly selected, valid sublist index for this multilist */
271 unsigned int
272 multilist_get_random_index(multilist_t *ml)
274 return (spa_get_random(ml->ml_num_sublists));
277 /* Lock and return the sublist specified at the given index */
278 multilist_sublist_t *
279 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
281 multilist_sublist_t *mls;
283 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
284 mls = &ml->ml_sublists[sublist_idx];
285 mutex_enter(&mls->mls_lock);
287 return (mls);
290 /* Lock and return the sublist that would be used to store the specified obj */
291 multilist_sublist_t *
292 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
294 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
297 void
298 multilist_sublist_unlock(multilist_sublist_t *mls)
300 mutex_exit(&mls->mls_lock);
304 * We're allowing any object to be inserted into this specific sublist,
305 * but this can lead to trouble if multilist_remove() is called to
306 * remove this object. Specifically, if calling ml_index_func on this
307 * object returns an index for sublist different than what is passed as
308 * a parameter here, any call to multilist_remove() with this newly
309 * inserted object is undefined! (the call to multilist_remove() will
310 * remove the object from a list that it isn't contained in)
312 void
313 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
315 ASSERT(MUTEX_HELD(&mls->mls_lock));
316 list_insert_head(&mls->mls_list, obj);
319 /* please see comment above multilist_sublist_insert_head */
320 void
321 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
323 ASSERT(MUTEX_HELD(&mls->mls_lock));
324 list_insert_tail(&mls->mls_list, obj);
328 * Move the object one element forward in the list.
330 * This function will move the given object forward in the list (towards
331 * the head) by one object. So, in essence, it will swap its position in
332 * the list with its "prev" pointer. If the given object is already at the
333 * head of the list, it cannot be moved forward any more than it already
334 * is, so no action is taken.
336 * NOTE: This function **must not** remove any object from the list other
337 * than the object given as the parameter. This is relied upon in
338 * arc_evict_state_impl().
340 void
341 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
343 void *prev = list_prev(&mls->mls_list, obj);
345 ASSERT(MUTEX_HELD(&mls->mls_lock));
346 ASSERT(!list_is_empty(&mls->mls_list));
348 /* 'obj' must be at the head of the list, nothing to do */
349 if (prev == NULL)
350 return;
352 list_remove(&mls->mls_list, obj);
353 list_insert_before(&mls->mls_list, prev, obj);
356 void
357 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
359 ASSERT(MUTEX_HELD(&mls->mls_lock));
360 list_remove(&mls->mls_list, obj);
363 void *
364 multilist_sublist_head(multilist_sublist_t *mls)
366 ASSERT(MUTEX_HELD(&mls->mls_lock));
367 return (list_head(&mls->mls_list));
370 void *
371 multilist_sublist_tail(multilist_sublist_t *mls)
373 ASSERT(MUTEX_HELD(&mls->mls_lock));
374 return (list_tail(&mls->mls_list));
377 void *
378 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
380 ASSERT(MUTEX_HELD(&mls->mls_lock));
381 return (list_next(&mls->mls_list, obj));
384 void *
385 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
387 ASSERT(MUTEX_HELD(&mls->mls_lock));
388 return (list_prev(&mls->mls_list, obj));
391 void
392 multilist_link_init(multilist_node_t *link)
394 list_link_init(link);
398 multilist_link_active(multilist_node_t *link)
400 return (list_link_active(link));