1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include "qemu/osdep.h"
4 #include "qemu/interval-tree.h"
5 #include "qemu/atomic.h"
10 * For now, don't expose Linux Red-Black Trees separately, but retain the
11 * separate type definitions to keep the implementation sane, and allow
12 * the possibility of separating them later.
14 * Derived from include/linux/rbtree_augmented.h and its dependencies.
18 * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
20 * 1) A node is either red or black
21 * 2) The root is black
22 * 3) All leaves (NULL) are black
23 * 4) Both children of every red node are black
24 * 5) Every simple path from root to leaves contains the same number
27 * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
28 * consecutive red nodes in a path and every red node is therefore followed by
29 * a black. So if B is the number of black nodes on every simple path (as per
30 * 5), then the longest possible path due to 4 is 2B.
32 * We shall indicate color with case, where black nodes are uppercase and red
33 * nodes will be lowercase. Unknown color nodes shall be drawn as red within
34 * parentheses and have some accompanying text comment.
36 * Notes on lockless lookups:
38 * All stores to the tree structure (rb_left and rb_right) must be done using
39 * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause
40 * (temporary) loops in the tree structure as seen in program order.
42 * These two requirements will allow lockless iteration of the tree -- not
43 * correct iteration mind you, tree rotations are not atomic so a lookup might
44 * miss entire subtrees.
46 * But they do guarantee that any such traversal will only see valid elements
47 * and that it will indeed complete -- does not get stuck in a loop.
49 * It also guarantees that if the lookup returns an element it is the 'correct'
50 * one. But not returning an element does _NOT_ mean it's not present.
59 typedef struct RBAugmentCallbacks
{
60 void (*propagate
)(RBNode
*node
, RBNode
*stop
);
61 void (*copy
)(RBNode
*old
, RBNode
*new);
62 void (*rotate
)(RBNode
*old
, RBNode
*new);
65 static inline uintptr_t rb_pc(const RBNode
*n
)
67 return qatomic_read(&n
->rb_parent_color
);
70 static inline void rb_set_pc(RBNode
*n
, uintptr_t pc
)
72 qatomic_set(&n
->rb_parent_color
, pc
);
75 static inline RBNode
*pc_parent(uintptr_t pc
)
77 return (RBNode
*)(pc
& ~1);
80 static inline RBNode
*rb_parent(const RBNode
*n
)
82 return pc_parent(rb_pc(n
));
85 static inline RBNode
*rb_red_parent(const RBNode
*n
)
87 return (RBNode
*)rb_pc(n
);
90 static inline RBColor
pc_color(uintptr_t pc
)
92 return (RBColor
)(pc
& 1);
95 static inline bool pc_is_red(uintptr_t pc
)
97 return pc_color(pc
) == RB_RED
;
100 static inline bool pc_is_black(uintptr_t pc
)
102 return !pc_is_red(pc
);
105 static inline RBColor
rb_color(const RBNode
*n
)
107 return pc_color(rb_pc(n
));
110 static inline bool rb_is_red(const RBNode
*n
)
112 return pc_is_red(rb_pc(n
));
115 static inline bool rb_is_black(const RBNode
*n
)
117 return pc_is_black(rb_pc(n
));
120 static inline void rb_set_black(RBNode
*n
)
122 rb_set_pc(n
, rb_pc(n
) | RB_BLACK
);
125 static inline void rb_set_parent_color(RBNode
*n
, RBNode
*p
, RBColor color
)
127 rb_set_pc(n
, (uintptr_t)p
| color
);
130 static inline void rb_set_parent(RBNode
*n
, RBNode
*p
)
132 rb_set_parent_color(n
, p
, rb_color(n
));
135 static inline void rb_link_node(RBNode
*node
, RBNode
*parent
, RBNode
**rb_link
)
137 node
->rb_parent_color
= (uintptr_t)parent
;
138 node
->rb_left
= node
->rb_right
= NULL
;
141 * Ensure that node is initialized before insertion,
142 * as viewed by a concurrent search.
144 qatomic_set_mb(rb_link
, node
);
147 static RBNode
*rb_next(RBNode
*node
)
151 /* OMIT: if empty node, return null. */
154 * If we have a right-hand child, go down and then left as far as we can.
156 if (node
->rb_right
) {
157 node
= node
->rb_right
;
158 while (node
->rb_left
) {
159 node
= node
->rb_left
;
165 * No right-hand children. Everything down and left is smaller than us,
166 * so any 'next' node must be in the general direction of our parent.
167 * Go up the tree; any time the ancestor is a right-hand child of its
168 * parent, keep going up. First time it's a left-hand child of its
169 * parent, said parent is our 'next' node.
171 while ((parent
= rb_parent(node
)) && node
== parent
->rb_right
) {
178 static inline void rb_change_child(RBNode
*old
, RBNode
*new,
179 RBNode
*parent
, RBRoot
*root
)
182 qatomic_set(&root
->rb_node
, new);
183 } else if (parent
->rb_left
== old
) {
184 qatomic_set(&parent
->rb_left
, new);
186 qatomic_set(&parent
->rb_right
, new);
190 static inline void rb_rotate_set_parents(RBNode
*old
, RBNode
*new,
191 RBRoot
*root
, RBColor color
)
193 uintptr_t pc
= rb_pc(old
);
194 RBNode
*parent
= pc_parent(pc
);
197 rb_set_parent_color(old
, new, color
);
198 rb_change_child(old
, new, parent
, root
);
201 static void rb_insert_augmented(RBNode
*node
, RBRoot
*root
,
202 const RBAugmentCallbacks
*augment
)
204 RBNode
*parent
= rb_red_parent(node
), *gparent
, *tmp
;
208 * Loop invariant: node is red.
210 if (unlikely(!parent
)) {
212 * The inserted node is root. Either this is the first node, or
213 * we recursed at Case 1 below and are no longer violating 4).
215 rb_set_parent_color(node
, NULL
, RB_BLACK
);
220 * If there is a black parent, we are done. Otherwise, take some
221 * corrective action as, per 4), we don't want a red root or two
222 * consecutive red nodes.
224 if (rb_is_black(parent
)) {
228 gparent
= rb_red_parent(parent
);
230 tmp
= gparent
->rb_right
;
231 if (parent
!= tmp
) { /* parent == gparent->rb_left */
232 if (tmp
&& rb_is_red(tmp
)) {
234 * Case 1 - node's uncle is red (color flips).
242 * However, since g's parent might be red, and 4) does not
243 * allow this, we need to recurse at g.
245 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
246 rb_set_parent_color(parent
, gparent
, RB_BLACK
);
248 parent
= rb_parent(node
);
249 rb_set_parent_color(node
, parent
, RB_RED
);
253 tmp
= parent
->rb_right
;
256 * Case 2 - node's uncle is black and node is
257 * the parent's right child (left rotate at parent).
265 * This still leaves us in violation of 4), the
266 * continuation into Case 3 will fix that.
269 qatomic_set(&parent
->rb_right
, tmp
);
270 qatomic_set(&node
->rb_left
, parent
);
272 rb_set_parent_color(tmp
, parent
, RB_BLACK
);
274 rb_set_parent_color(parent
, node
, RB_RED
);
275 augment
->rotate(parent
, node
);
277 tmp
= node
->rb_right
;
281 * Case 3 - node's uncle is black and node is
282 * the parent's left child (right rotate at gparent).
290 qatomic_set(&gparent
->rb_left
, tmp
); /* == parent->rb_right */
291 qatomic_set(&parent
->rb_right
, gparent
);
293 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
295 rb_rotate_set_parents(gparent
, parent
, root
, RB_RED
);
296 augment
->rotate(gparent
, parent
);
299 tmp
= gparent
->rb_left
;
300 if (tmp
&& rb_is_red(tmp
)) {
301 /* Case 1 - color flips */
302 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
303 rb_set_parent_color(parent
, gparent
, RB_BLACK
);
305 parent
= rb_parent(node
);
306 rb_set_parent_color(node
, parent
, RB_RED
);
310 tmp
= parent
->rb_left
;
312 /* Case 2 - right rotate at parent */
313 tmp
= node
->rb_right
;
314 qatomic_set(&parent
->rb_left
, tmp
);
315 qatomic_set(&node
->rb_right
, parent
);
317 rb_set_parent_color(tmp
, parent
, RB_BLACK
);
319 rb_set_parent_color(parent
, node
, RB_RED
);
320 augment
->rotate(parent
, node
);
325 /* Case 3 - left rotate at gparent */
326 qatomic_set(&gparent
->rb_right
, tmp
); /* == parent->rb_left */
327 qatomic_set(&parent
->rb_left
, gparent
);
329 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
331 rb_rotate_set_parents(gparent
, parent
, root
, RB_RED
);
332 augment
->rotate(gparent
, parent
);
338 static void rb_insert_augmented_cached(RBNode
*node
,
339 RBRootLeftCached
*root
, bool newleft
,
340 const RBAugmentCallbacks
*augment
)
343 root
->rb_leftmost
= node
;
345 rb_insert_augmented(node
, &root
->rb_root
, augment
);
348 static void rb_erase_color(RBNode
*parent
, RBRoot
*root
,
349 const RBAugmentCallbacks
*augment
)
351 RBNode
*node
= NULL
, *sibling
, *tmp1
, *tmp2
;
356 * - node is black (or NULL on first iteration)
357 * - node is not the root (parent is not NULL)
358 * - All leaf paths going through parent and node have a
359 * black node count that is 1 lower than other leaf paths.
361 sibling
= parent
->rb_right
;
362 if (node
!= sibling
) { /* node == parent->rb_left */
363 if (rb_is_red(sibling
)) {
365 * Case 1 - left rotate at parent
373 tmp1
= sibling
->rb_left
;
374 qatomic_set(&parent
->rb_right
, tmp1
);
375 qatomic_set(&sibling
->rb_left
, parent
);
376 rb_set_parent_color(tmp1
, parent
, RB_BLACK
);
377 rb_rotate_set_parents(parent
, sibling
, root
, RB_RED
);
378 augment
->rotate(parent
, sibling
);
381 tmp1
= sibling
->rb_right
;
382 if (!tmp1
|| rb_is_black(tmp1
)) {
383 tmp2
= sibling
->rb_left
;
384 if (!tmp2
|| rb_is_black(tmp2
)) {
386 * Case 2 - sibling color flip
387 * (p could be either color here)
395 * This leaves us violating 5) which
396 * can be fixed by flipping p to black
397 * if it was red, or by recursing at p.
398 * p is red when coming from Case 1.
400 rb_set_parent_color(sibling
, parent
, RB_RED
);
401 if (rb_is_red(parent
)) {
402 rb_set_black(parent
);
405 parent
= rb_parent(node
);
413 * Case 3 - right rotate at sibling
414 * (p could be either color here)
424 * Note: p might be red, and then bot
425 * p and sl are red after rotation (which
426 * breaks property 4). This is fixed in
427 * Case 4 (in rb_rotate_set_parents()
428 * which set sl the color of p
429 * and set p RB_BLACK)
439 tmp1
= tmp2
->rb_right
;
440 qatomic_set(&sibling
->rb_left
, tmp1
);
441 qatomic_set(&tmp2
->rb_right
, sibling
);
442 qatomic_set(&parent
->rb_right
, tmp2
);
444 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
446 augment
->rotate(sibling
, tmp2
);
451 * Case 4 - left rotate at parent + color flips
452 * (p and sl could be either color here.
453 * After rotation, p becomes black, s acquires
454 * p's color, and sl keeps its color)
462 tmp2
= sibling
->rb_left
;
463 qatomic_set(&parent
->rb_right
, tmp2
);
464 qatomic_set(&sibling
->rb_left
, parent
);
465 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
467 rb_set_parent(tmp2
, parent
);
469 rb_rotate_set_parents(parent
, sibling
, root
, RB_BLACK
);
470 augment
->rotate(parent
, sibling
);
473 sibling
= parent
->rb_left
;
474 if (rb_is_red(sibling
)) {
475 /* Case 1 - right rotate at parent */
476 tmp1
= sibling
->rb_right
;
477 qatomic_set(&parent
->rb_left
, tmp1
);
478 qatomic_set(&sibling
->rb_right
, parent
);
479 rb_set_parent_color(tmp1
, parent
, RB_BLACK
);
480 rb_rotate_set_parents(parent
, sibling
, root
, RB_RED
);
481 augment
->rotate(parent
, sibling
);
484 tmp1
= sibling
->rb_left
;
485 if (!tmp1
|| rb_is_black(tmp1
)) {
486 tmp2
= sibling
->rb_right
;
487 if (!tmp2
|| rb_is_black(tmp2
)) {
488 /* Case 2 - sibling color flip */
489 rb_set_parent_color(sibling
, parent
, RB_RED
);
490 if (rb_is_red(parent
)) {
491 rb_set_black(parent
);
494 parent
= rb_parent(node
);
501 /* Case 3 - left rotate at sibling */
502 tmp1
= tmp2
->rb_left
;
503 qatomic_set(&sibling
->rb_right
, tmp1
);
504 qatomic_set(&tmp2
->rb_left
, sibling
);
505 qatomic_set(&parent
->rb_left
, tmp2
);
507 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
509 augment
->rotate(sibling
, tmp2
);
513 /* Case 4 - right rotate at parent + color flips */
514 tmp2
= sibling
->rb_right
;
515 qatomic_set(&parent
->rb_left
, tmp2
);
516 qatomic_set(&sibling
->rb_right
, parent
);
517 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
519 rb_set_parent(tmp2
, parent
);
521 rb_rotate_set_parents(parent
, sibling
, root
, RB_BLACK
);
522 augment
->rotate(parent
, sibling
);
528 static void rb_erase_augmented(RBNode
*node
, RBRoot
*root
,
529 const RBAugmentCallbacks
*augment
)
531 RBNode
*child
= node
->rb_right
;
532 RBNode
*tmp
= node
->rb_left
;
533 RBNode
*parent
, *rebalance
;
538 * Case 1: node to erase has no more than 1 child (easy!)
540 * Note that if there is one child it must be red due to 5)
541 * and node must be black due to 4). We adjust colors locally
542 * so as to bypass rb_erase_color() later on.
545 parent
= pc_parent(pc
);
546 rb_change_child(node
, child
, parent
, root
);
548 rb_set_pc(child
, pc
);
551 rebalance
= pc_is_black(pc
) ? parent
: NULL
;
555 /* Still case 1, but this time the child is node->rb_left */
557 parent
= pc_parent(pc
);
559 rb_change_child(node
, tmp
, parent
, root
);
563 RBNode
*successor
= child
, *child2
;
564 tmp
= child
->rb_left
;
567 * Case 2: node's successor is its right child
576 child2
= successor
->rb_right
;
578 augment
->copy(node
, successor
);
581 * Case 3: node's successor is leftmost under
582 * node's right child subtree
599 child2
= successor
->rb_right
;
600 qatomic_set(&parent
->rb_left
, child2
);
601 qatomic_set(&successor
->rb_right
, child
);
602 rb_set_parent(child
, successor
);
604 augment
->copy(node
, successor
);
605 augment
->propagate(parent
, successor
);
609 qatomic_set(&successor
->rb_left
, tmp
);
610 rb_set_parent(tmp
, successor
);
614 rb_change_child(node
, successor
, tmp
, root
);
617 rb_set_parent_color(child2
, parent
, RB_BLACK
);
620 rebalance
= rb_is_black(successor
) ? parent
: NULL
;
622 rb_set_pc(successor
, pc
);
626 augment
->propagate(tmp
, NULL
);
629 rb_erase_color(rebalance
, root
, augment
);
633 static void rb_erase_augmented_cached(RBNode
*node
, RBRootLeftCached
*root
,
634 const RBAugmentCallbacks
*augment
)
636 if (root
->rb_leftmost
== node
) {
637 root
->rb_leftmost
= rb_next(node
);
639 rb_erase_augmented(node
, &root
->rb_root
, augment
);
646 * Derived from lib/interval_tree.c and its dependencies,
647 * especially include/linux/interval_tree_generic.h.
650 #define rb_to_itree(N) container_of(N, IntervalTreeNode, rb)
652 static bool interval_tree_compute_max(IntervalTreeNode
*node
, bool exit
)
654 IntervalTreeNode
*child
;
655 uint64_t max
= node
->last
;
657 if (node
->rb
.rb_left
) {
658 child
= rb_to_itree(node
->rb
.rb_left
);
659 if (child
->subtree_last
> max
) {
660 max
= child
->subtree_last
;
663 if (node
->rb
.rb_right
) {
664 child
= rb_to_itree(node
->rb
.rb_right
);
665 if (child
->subtree_last
> max
) {
666 max
= child
->subtree_last
;
669 if (exit
&& node
->subtree_last
== max
) {
672 node
->subtree_last
= max
;
676 static void interval_tree_propagate(RBNode
*rb
, RBNode
*stop
)
679 IntervalTreeNode
*node
= rb_to_itree(rb
);
680 if (interval_tree_compute_max(node
, true)) {
683 rb
= rb_parent(&node
->rb
);
687 static void interval_tree_copy(RBNode
*rb_old
, RBNode
*rb_new
)
689 IntervalTreeNode
*old
= rb_to_itree(rb_old
);
690 IntervalTreeNode
*new = rb_to_itree(rb_new
);
692 new->subtree_last
= old
->subtree_last
;
695 static void interval_tree_rotate(RBNode
*rb_old
, RBNode
*rb_new
)
697 IntervalTreeNode
*old
= rb_to_itree(rb_old
);
698 IntervalTreeNode
*new = rb_to_itree(rb_new
);
700 new->subtree_last
= old
->subtree_last
;
701 interval_tree_compute_max(old
, false);
704 static const RBAugmentCallbacks interval_tree_augment
= {
705 .propagate
= interval_tree_propagate
,
706 .copy
= interval_tree_copy
,
707 .rotate
= interval_tree_rotate
,
710 /* Insert / remove interval nodes from the tree */
711 void interval_tree_insert(IntervalTreeNode
*node
, IntervalTreeRoot
*root
)
713 RBNode
**link
= &root
->rb_root
.rb_node
, *rb_parent
= NULL
;
714 uint64_t start
= node
->start
, last
= node
->last
;
715 IntervalTreeNode
*parent
;
716 bool leftmost
= true;
720 parent
= rb_to_itree(rb_parent
);
722 if (parent
->subtree_last
< last
) {
723 parent
->subtree_last
= last
;
725 if (start
< parent
->start
) {
726 link
= &parent
->rb
.rb_left
;
728 link
= &parent
->rb
.rb_right
;
733 node
->subtree_last
= last
;
734 rb_link_node(&node
->rb
, rb_parent
, link
);
735 rb_insert_augmented_cached(&node
->rb
, root
, leftmost
,
736 &interval_tree_augment
);
739 void interval_tree_remove(IntervalTreeNode
*node
, IntervalTreeRoot
*root
)
741 rb_erase_augmented_cached(&node
->rb
, root
, &interval_tree_augment
);
745 * Iterate over intervals intersecting [start;last]
747 * Note that a node's interval intersects [start;last] iff:
748 * Cond1: node->start <= last
750 * Cond2: start <= node->last
753 static IntervalTreeNode
*interval_tree_subtree_search(IntervalTreeNode
*node
,
759 * Loop invariant: start <= node->subtree_last
760 * (Cond2 is satisfied by one of the subtree nodes)
762 RBNode
*tmp
= qatomic_read(&node
->rb
.rb_left
);
764 IntervalTreeNode
*left
= rb_to_itree(tmp
);
766 if (start
<= left
->subtree_last
) {
768 * Some nodes in left subtree satisfy Cond2.
769 * Iterate to find the leftmost such node N.
770 * If it also satisfies Cond1, that's the
771 * match we are looking for. Otherwise, there
772 * is no matching interval as nodes to the
773 * right of N can't satisfy Cond1 either.
779 if (node
->start
<= last
) { /* Cond1 */
780 if (start
<= node
->last
) { /* Cond2 */
781 return node
; /* node is leftmost match */
783 tmp
= qatomic_read(&node
->rb
.rb_right
);
785 node
= rb_to_itree(tmp
);
786 if (start
<= node
->subtree_last
) {
791 return NULL
; /* no match */
795 IntervalTreeNode
*interval_tree_iter_first(IntervalTreeRoot
*root
,
796 uint64_t start
, uint64_t last
)
798 IntervalTreeNode
*node
, *leftmost
;
800 if (!root
|| !root
->rb_root
.rb_node
) {
805 * Fastpath range intersection/overlap between A: [a0, a1] and
806 * B: [b0, b1] is given by:
808 * a0 <= b1 && b0 <= a1
810 * ... where A holds the lock range and B holds the smallest
811 * 'start' and largest 'last' in the tree. For the later, we
812 * rely on the root node, which by augmented interval tree
813 * property, holds the largest value in its last-in-subtree.
814 * This allows mitigating some of the tree walk overhead for
815 * for non-intersecting ranges, maintained and consulted in O(1).
817 node
= rb_to_itree(root
->rb_root
.rb_node
);
818 if (node
->subtree_last
< start
) {
822 leftmost
= rb_to_itree(root
->rb_leftmost
);
823 if (leftmost
->start
> last
) {
827 return interval_tree_subtree_search(node
, start
, last
);
830 IntervalTreeNode
*interval_tree_iter_next(IntervalTreeNode
*node
,
831 uint64_t start
, uint64_t last
)
835 rb
= qatomic_read(&node
->rb
.rb_right
);
839 * Cond1: node->start <= last
840 * rb == node->rb.rb_right
842 * First, search right subtree if suitable
845 IntervalTreeNode
*right
= rb_to_itree(rb
);
847 if (start
<= right
->subtree_last
) {
848 return interval_tree_subtree_search(right
, start
, last
);
852 /* Move up the tree until we come from a node's left child */
854 rb
= rb_parent(&node
->rb
);
859 node
= rb_to_itree(rb
);
860 rb
= qatomic_read(&node
->rb
.rb_right
);
861 } while (prev
== rb
);
863 /* Check if the node intersects [start;last] */
864 if (last
< node
->start
) { /* !Cond1 */
867 if (start
<= node
->last
) { /* Cond2 */
873 /* Occasionally useful for calling from within the debugger. */
875 static void debug_interval_tree_int(IntervalTreeNode
*node
,
876 const char *dir
, int level
)
878 printf("%4d %*s %s [%" PRIu64
",%" PRIu64
"] subtree_last:%" PRIu64
"\n",
879 level
, level
+ 1, dir
, rb_is_red(&node
->rb
) ? "r" : "b",
880 node
->start
, node
->last
, node
->subtree_last
);
882 if (node
->rb
.rb_left
) {
883 debug_interval_tree_int(rb_to_itree(node
->rb
.rb_left
), "<", level
+ 1);
885 if (node
->rb
.rb_right
) {
886 debug_interval_tree_int(rb_to_itree(node
->rb
.rb_right
), ">", level
+ 1);
890 void debug_interval_tree(IntervalTreeNode
*node
);
891 void debug_interval_tree(IntervalTreeNode
*node
)
894 debug_interval_tree_int(node
, "*", 0);