transaction handles everywhere
[btrfs-progs-unstable.git] / radix-tree.c
blobbaa25ca1c2acb7e05c7fc30b8d163e6ae063894e
1 /*
2 * Copyright (C) 2001 Momchil Velikov
3 * Portions Copyright (C) 2001 Christoph Hellwig
4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2, or (at
9 * your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #include "kerncompat.h"
22 #include "radix-tree.h"
23 #ifdef __KERNEL__
24 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
25 #else
26 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
27 #endif
29 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
30 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
32 #define RADIX_TREE_TAG_LONGS \
33 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
35 struct radix_tree_node {
36 unsigned int count;
37 void *slots[RADIX_TREE_MAP_SIZE];
38 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
41 struct radix_tree_path {
42 struct radix_tree_node *node;
43 int offset;
46 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
47 #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
49 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
52 * Per-cpu pool of preloaded nodes
54 struct radix_tree_preload {
55 int nr;
56 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
58 struct radix_tree_preload radix_tree_preloads = { 0, };
60 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
62 return root->gfp_mask & __GFP_BITS_MASK;
65 static int internal_nodes = 0;
67 * This assumes that the caller has performed appropriate preallocation, and
68 * that the caller has pinned this thread of control to the current CPU.
70 static struct radix_tree_node *
71 radix_tree_node_alloc(struct radix_tree_root *root)
73 struct radix_tree_node *ret;
74 ret = malloc(sizeof(struct radix_tree_node));
75 if (ret) {
76 memset(ret, 0, sizeof(struct radix_tree_node));
77 internal_nodes++;
79 return ret;
82 static inline void
83 radix_tree_node_free(struct radix_tree_node *node)
85 internal_nodes--;
86 free(node);
90 * Load up this CPU's radix_tree_node buffer with sufficient objects to
91 * ensure that the addition of a single element in the tree cannot fail. On
92 * success, return zero, with preemption disabled. On error, return -ENOMEM
93 * with preemption not disabled.
95 int radix_tree_preload(gfp_t gfp_mask)
97 struct radix_tree_preload *rtp;
98 struct radix_tree_node *node;
99 int ret = -ENOMEM;
101 preempt_disable();
102 rtp = &__get_cpu_var(radix_tree_preloads);
103 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
104 preempt_enable();
105 node = radix_tree_node_alloc(NULL);
106 if (node == NULL)
107 goto out;
108 preempt_disable();
109 rtp = &__get_cpu_var(radix_tree_preloads);
110 if (rtp->nr < ARRAY_SIZE(rtp->nodes))
111 rtp->nodes[rtp->nr++] = node;
112 else
113 radix_tree_node_free(node);
115 ret = 0;
116 out:
117 return ret;
120 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
121 int offset)
123 __set_bit(offset, node->tags[tag]);
126 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
127 int offset)
129 __clear_bit(offset, node->tags[tag]);
132 static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
133 int offset)
135 return test_bit(offset, node->tags[tag]);
138 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
140 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
144 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
146 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
149 static inline void root_tag_clear_all(struct radix_tree_root *root)
151 root->gfp_mask &= __GFP_BITS_MASK;
154 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
156 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
160 * Returns 1 if any slot in the node has this tag set.
161 * Otherwise returns 0.
163 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
165 int idx;
166 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
167 if (node->tags[tag][idx])
168 return 1;
170 return 0;
174 * Return the maximum key which can be store into a
175 * radix tree with height HEIGHT.
177 static inline unsigned long radix_tree_maxindex(unsigned int height)
179 return height_to_maxindex[height];
183 * Extend a radix tree so it can store key @index.
185 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
187 struct radix_tree_node *node;
188 unsigned int height;
189 int tag;
191 /* Figure out what the height should be. */
192 height = root->height + 1;
193 while (index > radix_tree_maxindex(height))
194 height++;
196 if (root->rnode == NULL) {
197 root->height = height;
198 goto out;
201 do {
202 if (!(node = radix_tree_node_alloc(root)))
203 return -ENOMEM;
205 /* Increase the height. */
206 node->slots[0] = root->rnode;
208 /* Propagate the aggregated tag info into the new root */
209 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
210 if (root_tag_get(root, tag))
211 tag_set(node, tag, 0);
214 node->count = 1;
215 root->rnode = node;
216 root->height++;
217 } while (height > root->height);
218 out:
219 return 0;
223 * radix_tree_insert - insert into a radix tree
224 * @root: radix tree root
225 * @index: index key
226 * @item: item to insert
228 * Insert an item into the radix tree at position @index.
230 int radix_tree_insert(struct radix_tree_root *root,
231 unsigned long index, void *item)
233 struct radix_tree_node *node = NULL, *slot;
234 unsigned int height, shift;
235 int offset;
236 int error;
238 /* Make sure the tree is high enough. */
239 if (index > radix_tree_maxindex(root->height)) {
240 error = radix_tree_extend(root, index);
241 if (error)
242 return error;
245 slot = root->rnode;
246 height = root->height;
247 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
249 offset = 0; /* uninitialised var warning */
250 while (height > 0) {
251 if (slot == NULL) {
252 /* Have to add a child node. */
253 if (!(slot = radix_tree_node_alloc(root)))
254 return -ENOMEM;
255 if (node) {
256 node->slots[offset] = slot;
257 node->count++;
258 } else
259 root->rnode = slot;
262 /* Go a level down */
263 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
264 node = slot;
265 slot = node->slots[offset];
266 shift -= RADIX_TREE_MAP_SHIFT;
267 height--;
270 if (slot != NULL)
271 return -EEXIST;
273 if (node) {
274 node->count++;
275 node->slots[offset] = item;
276 BUG_ON(tag_get(node, 0, offset));
277 BUG_ON(tag_get(node, 1, offset));
278 } else {
279 root->rnode = item;
280 BUG_ON(root_tag_get(root, 0));
281 BUG_ON(root_tag_get(root, 1));
284 return 0;
287 static inline void **__lookup_slot(struct radix_tree_root *root,
288 unsigned long index)
290 unsigned int height, shift;
291 struct radix_tree_node **slot;
293 height = root->height;
295 if (index > radix_tree_maxindex(height))
296 return NULL;
298 if (height == 0 && root->rnode)
299 return (void **)&root->rnode;
301 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
302 slot = &root->rnode;
304 while (height > 0) {
305 if (*slot == NULL)
306 return NULL;
308 slot = (struct radix_tree_node **)
309 ((*slot)->slots +
310 ((index >> shift) & RADIX_TREE_MAP_MASK));
311 shift -= RADIX_TREE_MAP_SHIFT;
312 height--;
315 return (void **)slot;
319 * radix_tree_lookup_slot - lookup a slot in a radix tree
320 * @root: radix tree root
321 * @index: index key
323 * Lookup the slot corresponding to the position @index in the radix tree
324 * @root. This is useful for update-if-exists operations.
326 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
328 return __lookup_slot(root, index);
332 * radix_tree_lookup - perform lookup operation on a radix tree
333 * @root: radix tree root
334 * @index: index key
336 * Lookup the item at the position @index in the radix tree @root.
338 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
340 void **slot;
342 slot = __lookup_slot(root, index);
343 return slot != NULL ? *slot : NULL;
347 * radix_tree_tag_set - set a tag on a radix tree node
348 * @root: radix tree root
349 * @index: index key
350 * @tag: tag index
352 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
353 * corresponding to @index in the radix tree. From
354 * the root all the way down to the leaf node.
356 * Returns the address of the tagged item. Setting a tag on a not-present
357 * item is a bug.
359 void *radix_tree_tag_set(struct radix_tree_root *root,
360 unsigned long index, unsigned int tag)
362 unsigned int height, shift;
363 struct radix_tree_node *slot;
365 height = root->height;
366 BUG_ON(index > radix_tree_maxindex(height));
368 slot = root->rnode;
369 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
371 while (height > 0) {
372 int offset;
374 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
375 if (!tag_get(slot, tag, offset))
376 tag_set(slot, tag, offset);
377 slot = slot->slots[offset];
378 BUG_ON(slot == NULL);
379 shift -= RADIX_TREE_MAP_SHIFT;
380 height--;
383 /* set the root's tag bit */
384 if (slot && !root_tag_get(root, tag))
385 root_tag_set(root, tag);
387 return slot;
391 * radix_tree_tag_clear - clear a tag on a radix tree node
392 * @root: radix tree root
393 * @index: index key
394 * @tag: tag index
396 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
397 * corresponding to @index in the radix tree. If
398 * this causes the leaf node to have no tags set then clear the tag in the
399 * next-to-leaf node, etc.
401 * Returns the address of the tagged item on success, else NULL. ie:
402 * has the same return value and semantics as radix_tree_lookup().
404 void *radix_tree_tag_clear(struct radix_tree_root *root,
405 unsigned long index, unsigned int tag)
407 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
408 struct radix_tree_node *slot = NULL;
409 unsigned int height, shift;
411 height = root->height;
412 if (index > radix_tree_maxindex(height))
413 goto out;
415 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
416 pathp->node = NULL;
417 slot = root->rnode;
419 while (height > 0) {
420 int offset;
422 if (slot == NULL)
423 goto out;
425 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
426 pathp[1].offset = offset;
427 pathp[1].node = slot;
428 slot = slot->slots[offset];
429 pathp++;
430 shift -= RADIX_TREE_MAP_SHIFT;
431 height--;
434 if (slot == NULL)
435 goto out;
437 while (pathp->node) {
438 if (!tag_get(pathp->node, tag, pathp->offset))
439 goto out;
440 tag_clear(pathp->node, tag, pathp->offset);
441 if (any_tag_set(pathp->node, tag))
442 goto out;
443 pathp--;
446 /* clear the root's tag bit */
447 if (root_tag_get(root, tag))
448 root_tag_clear(root, tag);
450 out:
451 return slot;
454 #ifndef __KERNEL__ /* Only the test harness uses this at present */
456 * radix_tree_tag_get - get a tag on a radix tree node
457 * @root: radix tree root
458 * @index: index key
459 * @tag: tag index (< RADIX_TREE_MAX_TAGS)
461 * Return values:
463 * 0: tag not present or not set
464 * 1: tag set
466 int radix_tree_tag_get(struct radix_tree_root *root,
467 unsigned long index, unsigned int tag)
469 unsigned int height, shift;
470 struct radix_tree_node *slot;
471 int saw_unset_tag = 0;
473 height = root->height;
474 if (index > radix_tree_maxindex(height))
475 return 0;
477 /* check the root's tag bit */
478 if (!root_tag_get(root, tag))
479 return 0;
481 if (height == 0)
482 return 1;
484 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
485 slot = root->rnode;
487 for ( ; ; ) {
488 int offset;
490 if (slot == NULL)
491 return 0;
493 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
496 * This is just a debug check. Later, we can bale as soon as
497 * we see an unset tag.
499 if (!tag_get(slot, tag, offset))
500 saw_unset_tag = 1;
501 if (height == 1) {
502 int ret = tag_get(slot, tag, offset);
504 BUG_ON(ret && saw_unset_tag);
505 return !!ret;
507 slot = slot->slots[offset];
508 shift -= RADIX_TREE_MAP_SHIFT;
509 height--;
512 #endif
514 static unsigned int
515 __lookup(struct radix_tree_root *root, void **results, unsigned long index,
516 unsigned int max_items, unsigned long *next_index)
518 unsigned int nr_found = 0;
519 unsigned int shift, height;
520 struct radix_tree_node *slot;
521 unsigned long i;
523 height = root->height;
524 if (height == 0) {
525 if (root->rnode && index == 0)
526 results[nr_found++] = root->rnode;
527 goto out;
530 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
531 slot = root->rnode;
533 for ( ; height > 1; height--) {
535 for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
536 i < RADIX_TREE_MAP_SIZE; i++) {
537 if (slot->slots[i] != NULL)
538 break;
539 index &= ~((1UL << shift) - 1);
540 index += 1UL << shift;
541 if (index == 0)
542 goto out; /* 32-bit wraparound */
544 if (i == RADIX_TREE_MAP_SIZE)
545 goto out;
547 shift -= RADIX_TREE_MAP_SHIFT;
548 slot = slot->slots[i];
551 /* Bottom level: grab some items */
552 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
553 index++;
554 if (slot->slots[i]) {
555 results[nr_found++] = slot->slots[i];
556 if (nr_found == max_items)
557 goto out;
560 out:
561 *next_index = index;
562 return nr_found;
566 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
567 * @root: radix tree root
568 * @results: where the results of the lookup are placed
569 * @first_index: start the lookup from this key
570 * @max_items: place up to this many items at *results
572 * Performs an index-ascending scan of the tree for present items. Places
573 * them at *@results and returns the number of items which were placed at
574 * *@results.
576 * The implementation is naive.
578 unsigned int
579 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
580 unsigned long first_index, unsigned int max_items)
582 const unsigned long max_index = radix_tree_maxindex(root->height);
583 unsigned long cur_index = first_index;
584 unsigned int ret = 0;
586 while (ret < max_items) {
587 unsigned int nr_found;
588 unsigned long next_index; /* Index of next search */
590 if (cur_index > max_index)
591 break;
592 nr_found = __lookup(root, results + ret, cur_index,
593 max_items - ret, &next_index);
594 ret += nr_found;
595 if (next_index == 0)
596 break;
597 cur_index = next_index;
599 return ret;
603 * FIXME: the two tag_get()s here should use find_next_bit() instead of
604 * open-coding the search.
606 static unsigned int
607 __lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
608 unsigned int max_items, unsigned long *next_index, unsigned int tag)
610 unsigned int nr_found = 0;
611 unsigned int shift;
612 unsigned int height = root->height;
613 struct radix_tree_node *slot;
615 if (height == 0) {
616 if (root->rnode && index == 0)
617 results[nr_found++] = root->rnode;
618 goto out;
621 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
622 slot = root->rnode;
624 do {
625 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
627 for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
628 if (tag_get(slot, tag, i)) {
629 BUG_ON(slot->slots[i] == NULL);
630 break;
632 index &= ~((1UL << shift) - 1);
633 index += 1UL << shift;
634 if (index == 0)
635 goto out; /* 32-bit wraparound */
637 if (i == RADIX_TREE_MAP_SIZE)
638 goto out;
639 height--;
640 if (height == 0) { /* Bottom level: grab some items */
641 unsigned long j = index & RADIX_TREE_MAP_MASK;
643 for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
644 index++;
645 if (tag_get(slot, tag, j)) {
646 BUG_ON(slot->slots[j] == NULL);
647 results[nr_found++] = slot->slots[j];
648 if (nr_found == max_items)
649 goto out;
653 shift -= RADIX_TREE_MAP_SHIFT;
654 slot = slot->slots[i];
655 } while (height > 0);
656 out:
657 *next_index = index;
658 return nr_found;
662 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
663 * based on a tag
664 * @root: radix tree root
665 * @results: where the results of the lookup are placed
666 * @first_index: start the lookup from this key
667 * @max_items: place up to this many items at *results
668 * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
670 * Performs an index-ascending scan of the tree for present items which
671 * have the tag indexed by @tag set. Places the items at *@results and
672 * returns the number of items which were placed at *@results.
674 unsigned int
675 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
676 unsigned long first_index, unsigned int max_items,
677 unsigned int tag)
679 const unsigned long max_index = radix_tree_maxindex(root->height);
680 unsigned long cur_index = first_index;
681 unsigned int ret = 0;
683 /* check the root's tag bit */
684 if (!root_tag_get(root, tag))
685 return 0;
687 while (ret < max_items) {
688 unsigned int nr_found;
689 unsigned long next_index; /* Index of next search */
691 if (cur_index > max_index)
692 break;
693 nr_found = __lookup_tag(root, results + ret, cur_index,
694 max_items - ret, &next_index, tag);
695 ret += nr_found;
696 if (next_index == 0)
697 break;
698 cur_index = next_index;
700 return ret;
704 * radix_tree_shrink - shrink height of a radix tree to minimal
705 * @root radix tree root
707 static inline void radix_tree_shrink(struct radix_tree_root *root)
709 /* try to shrink tree height */
710 while (root->height > 0 &&
711 root->rnode->count == 1 &&
712 root->rnode->slots[0]) {
713 struct radix_tree_node *to_free = root->rnode;
715 root->rnode = to_free->slots[0];
716 root->height--;
717 /* must only free zeroed nodes into the slab */
718 tag_clear(to_free, 0, 0);
719 tag_clear(to_free, 1, 0);
720 to_free->slots[0] = NULL;
721 to_free->count = 0;
722 radix_tree_node_free(to_free);
727 * radix_tree_delete - delete an item from a radix tree
728 * @root: radix tree root
729 * @index: index key
731 * Remove the item at @index from the radix tree rooted at @root.
733 * Returns the address of the deleted item, or NULL if it was not present.
735 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
737 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
738 struct radix_tree_node *slot = NULL;
739 unsigned int height, shift;
740 int tag;
741 int offset;
743 height = root->height;
744 if (index > radix_tree_maxindex(height))
745 goto out;
747 slot = root->rnode;
748 if (height == 0 && root->rnode) {
749 root_tag_clear_all(root);
750 root->rnode = NULL;
751 goto out;
754 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
755 pathp->node = NULL;
757 do {
758 if (slot == NULL)
759 goto out;
761 pathp++;
762 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
763 pathp->offset = offset;
764 pathp->node = slot;
765 slot = slot->slots[offset];
766 shift -= RADIX_TREE_MAP_SHIFT;
767 height--;
768 } while (height > 0);
770 if (slot == NULL)
771 goto out;
774 * Clear all tags associated with the just-deleted item
776 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
777 if (tag_get(pathp->node, tag, pathp->offset))
778 radix_tree_tag_clear(root, index, tag);
781 /* Now free the nodes we do not need anymore */
782 while (pathp->node) {
783 pathp->node->slots[pathp->offset] = NULL;
784 pathp->node->count--;
786 if (pathp->node->count) {
787 if (pathp->node == root->rnode)
788 radix_tree_shrink(root);
789 goto out;
792 /* Node with zero slots in use so free it */
793 radix_tree_node_free(pathp->node);
795 pathp--;
797 root_tag_clear_all(root);
798 root->height = 0;
799 root->rnode = NULL;
801 out:
802 return slot;
806 * radix_tree_tagged - test whether any items in the tree are tagged
807 * @root: radix tree root
808 * @tag: tag to test
810 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
812 return root_tag_get(root, tag);
815 static unsigned long __maxindex(unsigned int height)
817 unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
818 unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
820 if (tmp >= RADIX_TREE_INDEX_BITS)
821 index = ~0UL;
822 return index;
825 static void radix_tree_init_maxindex(void)
827 unsigned int i;
829 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
830 height_to_maxindex[i] = __maxindex(i);
833 void radix_tree_init(void)
835 radix_tree_init_maxindex();