1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include "qemu/osdep.h"
4 #include "qemu/interval-tree.h"
5 #include "qemu/atomic.h"
10 * For now, don't expose Linux Red-Black Trees separately, but retain the
11 * separate type definitions to keep the implementation sane, and allow
12 * the possibility of separating them later.
14 * Derived from include/linux/rbtree_augmented.h and its dependencies.
18 * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
20 * 1) A node is either red or black
21 * 2) The root is black
22 * 3) All leaves (NULL) are black
23 * 4) Both children of every red node are black
24 * 5) Every simple path from root to leaves contains the same number
27 * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
28 * consecutive red nodes in a path and every red node is therefore followed by
29 * a black. So if B is the number of black nodes on every simple path (as per
30 * 5), then the longest possible path due to 4 is 2B.
32 * We shall indicate color with case, where black nodes are uppercase and red
33 * nodes will be lowercase. Unknown color nodes shall be drawn as red within
34 * parentheses and have some accompanying text comment.
36 * Notes on lockless lookups:
38 * All stores to the tree structure (rb_left and rb_right) must be done using
39 * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause
40 * (temporary) loops in the tree structure as seen in program order.
42 * These two requirements will allow lockless iteration of the tree -- not
43 * correct iteration mind you, tree rotations are not atomic so a lookup might
44 * miss entire subtrees.
46 * But they do guarantee that any such traversal will only see valid elements
47 * and that it will indeed complete -- does not get stuck in a loop.
49 * It also guarantees that if the lookup returns an element it is the 'correct'
50 * one. But not returning an element does _NOT_ mean it's not present.
54 * Stores to __rb_parent_color are not important for simple lookups so those
55 * are left undone as of now. Nor did I check for loops involving parent
65 typedef struct RBAugmentCallbacks
{
66 void (*propagate
)(RBNode
*node
, RBNode
*stop
);
67 void (*copy
)(RBNode
*old
, RBNode
*new);
68 void (*rotate
)(RBNode
*old
, RBNode
*new);
71 static inline RBNode
*rb_parent(const RBNode
*n
)
73 return (RBNode
*)(n
->rb_parent_color
& ~1);
76 static inline RBNode
*rb_red_parent(const RBNode
*n
)
78 return (RBNode
*)n
->rb_parent_color
;
81 static inline RBColor
pc_color(uintptr_t pc
)
83 return (RBColor
)(pc
& 1);
86 static inline bool pc_is_red(uintptr_t pc
)
88 return pc_color(pc
) == RB_RED
;
91 static inline bool pc_is_black(uintptr_t pc
)
93 return !pc_is_red(pc
);
96 static inline RBColor
rb_color(const RBNode
*n
)
98 return pc_color(n
->rb_parent_color
);
101 static inline bool rb_is_red(const RBNode
*n
)
103 return pc_is_red(n
->rb_parent_color
);
106 static inline bool rb_is_black(const RBNode
*n
)
108 return pc_is_black(n
->rb_parent_color
);
111 static inline void rb_set_black(RBNode
*n
)
113 n
->rb_parent_color
|= RB_BLACK
;
116 static inline void rb_set_parent_color(RBNode
*n
, RBNode
*p
, RBColor color
)
118 n
->rb_parent_color
= (uintptr_t)p
| color
;
121 static inline void rb_set_parent(RBNode
*n
, RBNode
*p
)
123 rb_set_parent_color(n
, p
, rb_color(n
));
126 static inline void rb_link_node(RBNode
*node
, RBNode
*parent
, RBNode
**rb_link
)
128 node
->rb_parent_color
= (uintptr_t)parent
;
129 node
->rb_left
= node
->rb_right
= NULL
;
131 qatomic_set(rb_link
, node
);
134 static RBNode
*rb_next(RBNode
*node
)
138 /* OMIT: if empty node, return null. */
141 * If we have a right-hand child, go down and then left as far as we can.
143 if (node
->rb_right
) {
144 node
= node
->rb_right
;
145 while (node
->rb_left
) {
146 node
= node
->rb_left
;
152 * No right-hand children. Everything down and left is smaller than us,
153 * so any 'next' node must be in the general direction of our parent.
154 * Go up the tree; any time the ancestor is a right-hand child of its
155 * parent, keep going up. First time it's a left-hand child of its
156 * parent, said parent is our 'next' node.
158 while ((parent
= rb_parent(node
)) && node
== parent
->rb_right
) {
165 static inline void rb_change_child(RBNode
*old
, RBNode
*new,
166 RBNode
*parent
, RBRoot
*root
)
169 qatomic_set(&root
->rb_node
, new);
170 } else if (parent
->rb_left
== old
) {
171 qatomic_set(&parent
->rb_left
, new);
173 qatomic_set(&parent
->rb_right
, new);
177 static inline void rb_rotate_set_parents(RBNode
*old
, RBNode
*new,
178 RBRoot
*root
, RBColor color
)
180 RBNode
*parent
= rb_parent(old
);
182 new->rb_parent_color
= old
->rb_parent_color
;
183 rb_set_parent_color(old
, new, color
);
184 rb_change_child(old
, new, parent
, root
);
187 static void rb_insert_augmented(RBNode
*node
, RBRoot
*root
,
188 const RBAugmentCallbacks
*augment
)
190 RBNode
*parent
= rb_red_parent(node
), *gparent
, *tmp
;
194 * Loop invariant: node is red.
196 if (unlikely(!parent
)) {
198 * The inserted node is root. Either this is the first node, or
199 * we recursed at Case 1 below and are no longer violating 4).
201 rb_set_parent_color(node
, NULL
, RB_BLACK
);
206 * If there is a black parent, we are done. Otherwise, take some
207 * corrective action as, per 4), we don't want a red root or two
208 * consecutive red nodes.
210 if (rb_is_black(parent
)) {
214 gparent
= rb_red_parent(parent
);
216 tmp
= gparent
->rb_right
;
217 if (parent
!= tmp
) { /* parent == gparent->rb_left */
218 if (tmp
&& rb_is_red(tmp
)) {
220 * Case 1 - node's uncle is red (color flips).
228 * However, since g's parent might be red, and 4) does not
229 * allow this, we need to recurse at g.
231 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
232 rb_set_parent_color(parent
, gparent
, RB_BLACK
);
234 parent
= rb_parent(node
);
235 rb_set_parent_color(node
, parent
, RB_RED
);
239 tmp
= parent
->rb_right
;
242 * Case 2 - node's uncle is black and node is
243 * the parent's right child (left rotate at parent).
251 * This still leaves us in violation of 4), the
252 * continuation into Case 3 will fix that.
255 qatomic_set(&parent
->rb_right
, tmp
);
256 qatomic_set(&node
->rb_left
, parent
);
258 rb_set_parent_color(tmp
, parent
, RB_BLACK
);
260 rb_set_parent_color(parent
, node
, RB_RED
);
261 augment
->rotate(parent
, node
);
263 tmp
= node
->rb_right
;
267 * Case 3 - node's uncle is black and node is
268 * the parent's left child (right rotate at gparent).
276 qatomic_set(&gparent
->rb_left
, tmp
); /* == parent->rb_right */
277 qatomic_set(&parent
->rb_right
, gparent
);
279 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
281 rb_rotate_set_parents(gparent
, parent
, root
, RB_RED
);
282 augment
->rotate(gparent
, parent
);
285 tmp
= gparent
->rb_left
;
286 if (tmp
&& rb_is_red(tmp
)) {
287 /* Case 1 - color flips */
288 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
289 rb_set_parent_color(parent
, gparent
, RB_BLACK
);
291 parent
= rb_parent(node
);
292 rb_set_parent_color(node
, parent
, RB_RED
);
296 tmp
= parent
->rb_left
;
298 /* Case 2 - right rotate at parent */
299 tmp
= node
->rb_right
;
300 qatomic_set(&parent
->rb_left
, tmp
);
301 qatomic_set(&node
->rb_right
, parent
);
303 rb_set_parent_color(tmp
, parent
, RB_BLACK
);
305 rb_set_parent_color(parent
, node
, RB_RED
);
306 augment
->rotate(parent
, node
);
311 /* Case 3 - left rotate at gparent */
312 qatomic_set(&gparent
->rb_right
, tmp
); /* == parent->rb_left */
313 qatomic_set(&parent
->rb_left
, gparent
);
315 rb_set_parent_color(tmp
, gparent
, RB_BLACK
);
317 rb_rotate_set_parents(gparent
, parent
, root
, RB_RED
);
318 augment
->rotate(gparent
, parent
);
324 static void rb_insert_augmented_cached(RBNode
*node
,
325 RBRootLeftCached
*root
, bool newleft
,
326 const RBAugmentCallbacks
*augment
)
329 root
->rb_leftmost
= node
;
331 rb_insert_augmented(node
, &root
->rb_root
, augment
);
334 static void rb_erase_color(RBNode
*parent
, RBRoot
*root
,
335 const RBAugmentCallbacks
*augment
)
337 RBNode
*node
= NULL
, *sibling
, *tmp1
, *tmp2
;
342 * - node is black (or NULL on first iteration)
343 * - node is not the root (parent is not NULL)
344 * - All leaf paths going through parent and node have a
345 * black node count that is 1 lower than other leaf paths.
347 sibling
= parent
->rb_right
;
348 if (node
!= sibling
) { /* node == parent->rb_left */
349 if (rb_is_red(sibling
)) {
351 * Case 1 - left rotate at parent
359 tmp1
= sibling
->rb_left
;
360 qatomic_set(&parent
->rb_right
, tmp1
);
361 qatomic_set(&sibling
->rb_left
, parent
);
362 rb_set_parent_color(tmp1
, parent
, RB_BLACK
);
363 rb_rotate_set_parents(parent
, sibling
, root
, RB_RED
);
364 augment
->rotate(parent
, sibling
);
367 tmp1
= sibling
->rb_right
;
368 if (!tmp1
|| rb_is_black(tmp1
)) {
369 tmp2
= sibling
->rb_left
;
370 if (!tmp2
|| rb_is_black(tmp2
)) {
372 * Case 2 - sibling color flip
373 * (p could be either color here)
381 * This leaves us violating 5) which
382 * can be fixed by flipping p to black
383 * if it was red, or by recursing at p.
384 * p is red when coming from Case 1.
386 rb_set_parent_color(sibling
, parent
, RB_RED
);
387 if (rb_is_red(parent
)) {
388 rb_set_black(parent
);
391 parent
= rb_parent(node
);
399 * Case 3 - right rotate at sibling
400 * (p could be either color here)
410 * Note: p might be red, and then bot
411 * p and sl are red after rotation (which
412 * breaks property 4). This is fixed in
413 * Case 4 (in rb_rotate_set_parents()
414 * which set sl the color of p
415 * and set p RB_BLACK)
425 tmp1
= tmp2
->rb_right
;
426 qatomic_set(&sibling
->rb_left
, tmp1
);
427 qatomic_set(&tmp2
->rb_right
, sibling
);
428 qatomic_set(&parent
->rb_right
, tmp2
);
430 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
432 augment
->rotate(sibling
, tmp2
);
437 * Case 4 - left rotate at parent + color flips
438 * (p and sl could be either color here.
439 * After rotation, p becomes black, s acquires
440 * p's color, and sl keeps its color)
448 tmp2
= sibling
->rb_left
;
449 qatomic_set(&parent
->rb_right
, tmp2
);
450 qatomic_set(&sibling
->rb_left
, parent
);
451 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
453 rb_set_parent(tmp2
, parent
);
455 rb_rotate_set_parents(parent
, sibling
, root
, RB_BLACK
);
456 augment
->rotate(parent
, sibling
);
459 sibling
= parent
->rb_left
;
460 if (rb_is_red(sibling
)) {
461 /* Case 1 - right rotate at parent */
462 tmp1
= sibling
->rb_right
;
463 qatomic_set(&parent
->rb_left
, tmp1
);
464 qatomic_set(&sibling
->rb_right
, parent
);
465 rb_set_parent_color(tmp1
, parent
, RB_BLACK
);
466 rb_rotate_set_parents(parent
, sibling
, root
, RB_RED
);
467 augment
->rotate(parent
, sibling
);
470 tmp1
= sibling
->rb_left
;
471 if (!tmp1
|| rb_is_black(tmp1
)) {
472 tmp2
= sibling
->rb_right
;
473 if (!tmp2
|| rb_is_black(tmp2
)) {
474 /* Case 2 - sibling color flip */
475 rb_set_parent_color(sibling
, parent
, RB_RED
);
476 if (rb_is_red(parent
)) {
477 rb_set_black(parent
);
480 parent
= rb_parent(node
);
487 /* Case 3 - left rotate at sibling */
488 tmp1
= tmp2
->rb_left
;
489 qatomic_set(&sibling
->rb_right
, tmp1
);
490 qatomic_set(&tmp2
->rb_left
, sibling
);
491 qatomic_set(&parent
->rb_left
, tmp2
);
493 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
495 augment
->rotate(sibling
, tmp2
);
499 /* Case 4 - right rotate at parent + color flips */
500 tmp2
= sibling
->rb_right
;
501 qatomic_set(&parent
->rb_left
, tmp2
);
502 qatomic_set(&sibling
->rb_right
, parent
);
503 rb_set_parent_color(tmp1
, sibling
, RB_BLACK
);
505 rb_set_parent(tmp2
, parent
);
507 rb_rotate_set_parents(parent
, sibling
, root
, RB_BLACK
);
508 augment
->rotate(parent
, sibling
);
514 static void rb_erase_augmented(RBNode
*node
, RBRoot
*root
,
515 const RBAugmentCallbacks
*augment
)
517 RBNode
*child
= node
->rb_right
;
518 RBNode
*tmp
= node
->rb_left
;
519 RBNode
*parent
, *rebalance
;
524 * Case 1: node to erase has no more than 1 child (easy!)
526 * Note that if there is one child it must be red due to 5)
527 * and node must be black due to 4). We adjust colors locally
528 * so as to bypass rb_erase_color() later on.
530 pc
= node
->rb_parent_color
;
531 parent
= rb_parent(node
);
532 rb_change_child(node
, child
, parent
, root
);
534 child
->rb_parent_color
= pc
;
537 rebalance
= pc_is_black(pc
) ? parent
: NULL
;
541 /* Still case 1, but this time the child is node->rb_left */
542 pc
= node
->rb_parent_color
;
543 parent
= rb_parent(node
);
544 tmp
->rb_parent_color
= pc
;
545 rb_change_child(node
, tmp
, parent
, root
);
549 RBNode
*successor
= child
, *child2
;
550 tmp
= child
->rb_left
;
553 * Case 2: node's successor is its right child
562 child2
= successor
->rb_right
;
564 augment
->copy(node
, successor
);
567 * Case 3: node's successor is leftmost under
568 * node's right child subtree
585 child2
= successor
->rb_right
;
586 qatomic_set(&parent
->rb_left
, child2
);
587 qatomic_set(&successor
->rb_right
, child
);
588 rb_set_parent(child
, successor
);
590 augment
->copy(node
, successor
);
591 augment
->propagate(parent
, successor
);
595 qatomic_set(&successor
->rb_left
, tmp
);
596 rb_set_parent(tmp
, successor
);
598 pc
= node
->rb_parent_color
;
599 tmp
= rb_parent(node
);
600 rb_change_child(node
, successor
, tmp
, root
);
603 rb_set_parent_color(child2
, parent
, RB_BLACK
);
606 rebalance
= rb_is_black(successor
) ? parent
: NULL
;
608 successor
->rb_parent_color
= pc
;
612 augment
->propagate(tmp
, NULL
);
615 rb_erase_color(rebalance
, root
, augment
);
619 static void rb_erase_augmented_cached(RBNode
*node
, RBRootLeftCached
*root
,
620 const RBAugmentCallbacks
*augment
)
622 if (root
->rb_leftmost
== node
) {
623 root
->rb_leftmost
= rb_next(node
);
625 rb_erase_augmented(node
, &root
->rb_root
, augment
);
632 * Derived from lib/interval_tree.c and its dependencies,
633 * especially include/linux/interval_tree_generic.h.
636 #define rb_to_itree(N) container_of(N, IntervalTreeNode, rb)
638 static bool interval_tree_compute_max(IntervalTreeNode
*node
, bool exit
)
640 IntervalTreeNode
*child
;
641 uint64_t max
= node
->last
;
643 if (node
->rb
.rb_left
) {
644 child
= rb_to_itree(node
->rb
.rb_left
);
645 if (child
->subtree_last
> max
) {
646 max
= child
->subtree_last
;
649 if (node
->rb
.rb_right
) {
650 child
= rb_to_itree(node
->rb
.rb_right
);
651 if (child
->subtree_last
> max
) {
652 max
= child
->subtree_last
;
655 if (exit
&& node
->subtree_last
== max
) {
658 node
->subtree_last
= max
;
662 static void interval_tree_propagate(RBNode
*rb
, RBNode
*stop
)
665 IntervalTreeNode
*node
= rb_to_itree(rb
);
666 if (interval_tree_compute_max(node
, true)) {
669 rb
= rb_parent(&node
->rb
);
673 static void interval_tree_copy(RBNode
*rb_old
, RBNode
*rb_new
)
675 IntervalTreeNode
*old
= rb_to_itree(rb_old
);
676 IntervalTreeNode
*new = rb_to_itree(rb_new
);
678 new->subtree_last
= old
->subtree_last
;
681 static void interval_tree_rotate(RBNode
*rb_old
, RBNode
*rb_new
)
683 IntervalTreeNode
*old
= rb_to_itree(rb_old
);
684 IntervalTreeNode
*new = rb_to_itree(rb_new
);
686 new->subtree_last
= old
->subtree_last
;
687 interval_tree_compute_max(old
, false);
690 static const RBAugmentCallbacks interval_tree_augment
= {
691 .propagate
= interval_tree_propagate
,
692 .copy
= interval_tree_copy
,
693 .rotate
= interval_tree_rotate
,
696 /* Insert / remove interval nodes from the tree */
697 void interval_tree_insert(IntervalTreeNode
*node
, IntervalTreeRoot
*root
)
699 RBNode
**link
= &root
->rb_root
.rb_node
, *rb_parent
= NULL
;
700 uint64_t start
= node
->start
, last
= node
->last
;
701 IntervalTreeNode
*parent
;
702 bool leftmost
= true;
706 parent
= rb_to_itree(rb_parent
);
708 if (parent
->subtree_last
< last
) {
709 parent
->subtree_last
= last
;
711 if (start
< parent
->start
) {
712 link
= &parent
->rb
.rb_left
;
714 link
= &parent
->rb
.rb_right
;
719 node
->subtree_last
= last
;
720 rb_link_node(&node
->rb
, rb_parent
, link
);
721 rb_insert_augmented_cached(&node
->rb
, root
, leftmost
,
722 &interval_tree_augment
);
725 void interval_tree_remove(IntervalTreeNode
*node
, IntervalTreeRoot
*root
)
727 rb_erase_augmented_cached(&node
->rb
, root
, &interval_tree_augment
);
731 * Iterate over intervals intersecting [start;last]
733 * Note that a node's interval intersects [start;last] iff:
734 * Cond1: node->start <= last
736 * Cond2: start <= node->last
739 static IntervalTreeNode
*interval_tree_subtree_search(IntervalTreeNode
*node
,
745 * Loop invariant: start <= node->subtree_last
746 * (Cond2 is satisfied by one of the subtree nodes)
748 if (node
->rb
.rb_left
) {
749 IntervalTreeNode
*left
= rb_to_itree(node
->rb
.rb_left
);
751 if (start
<= left
->subtree_last
) {
753 * Some nodes in left subtree satisfy Cond2.
754 * Iterate to find the leftmost such node N.
755 * If it also satisfies Cond1, that's the
756 * match we are looking for. Otherwise, there
757 * is no matching interval as nodes to the
758 * right of N can't satisfy Cond1 either.
764 if (node
->start
<= last
) { /* Cond1 */
765 if (start
<= node
->last
) { /* Cond2 */
766 return node
; /* node is leftmost match */
768 if (node
->rb
.rb_right
) {
769 node
= rb_to_itree(node
->rb
.rb_right
);
770 if (start
<= node
->subtree_last
) {
775 return NULL
; /* no match */
779 IntervalTreeNode
*interval_tree_iter_first(IntervalTreeRoot
*root
,
780 uint64_t start
, uint64_t last
)
782 IntervalTreeNode
*node
, *leftmost
;
784 if (!root
->rb_root
.rb_node
) {
789 * Fastpath range intersection/overlap between A: [a0, a1] and
790 * B: [b0, b1] is given by:
792 * a0 <= b1 && b0 <= a1
794 * ... where A holds the lock range and B holds the smallest
795 * 'start' and largest 'last' in the tree. For the later, we
796 * rely on the root node, which by augmented interval tree
797 * property, holds the largest value in its last-in-subtree.
798 * This allows mitigating some of the tree walk overhead for
799 * for non-intersecting ranges, maintained and consulted in O(1).
801 node
= rb_to_itree(root
->rb_root
.rb_node
);
802 if (node
->subtree_last
< start
) {
806 leftmost
= rb_to_itree(root
->rb_leftmost
);
807 if (leftmost
->start
> last
) {
811 return interval_tree_subtree_search(node
, start
, last
);
814 IntervalTreeNode
*interval_tree_iter_next(IntervalTreeNode
*node
,
815 uint64_t start
, uint64_t last
)
817 RBNode
*rb
= node
->rb
.rb_right
, *prev
;
822 * Cond1: node->start <= last
823 * rb == node->rb.rb_right
825 * First, search right subtree if suitable
828 IntervalTreeNode
*right
= rb_to_itree(rb
);
830 if (start
<= right
->subtree_last
) {
831 return interval_tree_subtree_search(right
, start
, last
);
835 /* Move up the tree until we come from a node's left child */
837 rb
= rb_parent(&node
->rb
);
842 node
= rb_to_itree(rb
);
843 rb
= node
->rb
.rb_right
;
844 } while (prev
== rb
);
846 /* Check if the node intersects [start;last] */
847 if (last
< node
->start
) { /* !Cond1 */
850 if (start
<= node
->last
) { /* Cond2 */
856 /* Occasionally useful for calling from within the debugger. */
858 static void debug_interval_tree_int(IntervalTreeNode
*node
,
859 const char *dir
, int level
)
861 printf("%4d %*s %s [%" PRIu64
",%" PRIu64
"] subtree_last:%" PRIu64
"\n",
862 level
, level
+ 1, dir
, rb_is_red(&node
->rb
) ? "r" : "b",
863 node
->start
, node
->last
, node
->subtree_last
);
865 if (node
->rb
.rb_left
) {
866 debug_interval_tree_int(rb_to_itree(node
->rb
.rb_left
), "<", level
+ 1);
868 if (node
->rb
.rb_right
) {
869 debug_interval_tree_int(rb_to_itree(node
->rb
.rb_right
), ">", level
+ 1);
873 void debug_interval_tree(IntervalTreeNode
*node
);
874 void debug_interval_tree(IntervalTreeNode
*node
)
877 debug_interval_tree_int(node
, "*", 0);