More notes on the possible min/max method.
[pachi/pachi-r6144.git] / uct / tree.h
blob850078522c5449a9c141834c98506cdbf3202b84
1 #ifndef PACHI_UCT_TREE_H
2 #define PACHI_UCT_TREE_H
4 /* Management of UCT trees. See diagram below for the node structure.
6 * Two allocation methods are supported for the tree nodes:
8 * - calloc/free: each node is allocated with one calloc.
9 * After a move, all nodes except the subtree rooted at
10 * the played move are freed one by one with free().
11 * Since this can be very slow (seen 9s and loss on time because
12 * of this) the nodes are freed in a background thread.
13 * We still reserve enough memory for the next move in case
14 * the background thread doesn't free nodes fast enough.
16 * - fast_alloc: a large buffer is allocated once, and each
17 * node allocation takes some of this buffer. After a move
18 * is played, no memory if freed if the buffer still has
19 * enough free space. Otherwise the subtree rooted at the
20 * played move is copied to a temporary buffer, pruning it
21 * if necessary to fit in this small buffer. We copy by
22 * preference nodes with largest number of playouts.
23 * Then the temporary buffer is copied back to the original
24 * buffer, which has now plenty of space.
25 * Once the fast_alloc mode is proven reliable, the
26 * calloc/free method will be removed. */
28 #include <stdbool.h>
29 #include <pthread.h>
30 #include "move.h"
31 #include "stats.h"
32 #include "probdist.h"
34 struct board;
35 struct uct;
38 * +------+
39 * | node |
40 * +------+
41 * / <- parent
42 * +------+ v- sibling +------+
43 * | node | ------------ | node |
44 * +------+ +------+
45 * | <- children |
46 * +------+ +------+ +------+ +------+
47 * | node | - | node | | node | - | node |
48 * +------+ +------+ +------+ +------+
51 /* TODO: Performance would benefit from a reorganization:
52 * (i) Allocate all children of a node within a single block.
53 * (ii) Keep all u stats together, and all amaf stats together.
54 * Currently, rave_update is top source of cache misses, and
55 * there is large memory overhead for having all nodes separate. */
57 struct tree_node {
58 hash_t hash;
59 struct tree_node *parent, *sibling, *children;
61 /*** From here on, struct is saved/loaded from opening tbook */
63 unsigned short depth; // just for statistics
65 /* Common Fate Graph distance from parent, but at most TREE_NODE_D_MAX+1 */
66 #define TREE_NODE_D_MAX 3
67 unsigned char d;
69 #define TREE_HINT_INVALID 1 // don't go to this node, invalid move
70 unsigned char hints;
72 /* coord is usually coord_t, but this is very space-sensitive. */
73 short coord;
75 /* In case multiple threads walk the tree, is_expanded is set
76 * atomically. Only the first thread setting it expands the node.
77 * The node goes through 3 states:
78 * 1) children == null, is_expanded == false: leaf node
79 * 2) children == null, is_expanded == true: one thread currently expanding
80 * 2) children != null, is_expanded == true: fully expanded node */
81 bool is_expanded;
83 struct move_stats u;
84 struct move_stats prior;
85 /* XXX: Should be way for policies to add their own stats */
86 struct move_stats amaf;
87 /* Stats before starting playout; used for distributed engine. */
88 struct move_stats pu;
89 /* Criticality information; information about final board owner
90 * of the tree coordinate corresponding to the node */
91 struct move_stats winner_owner; // owner == winner
92 struct move_stats black_owner; // owner == black
93 floating_t last_urgency; // the urgency of this node when last considered in ucb1rave_descend, also in black's perspective; just for debugging
96 struct tree_hash;
98 struct tree {
99 struct board *board;
100 struct tree_node *root;
101 struct board_symmetry root_symmetry;
102 enum stone root_color;
104 /* Whether to use any extra komi during score counting. This is
105 * tree-specific variable since this can arbitrarily change between
106 * moves. */
107 bool use_extra_komi;
108 /* The value of applied extra komi. For DYNKOMI_LINEAR, this value
109 * is only informative, the actual value is computed per simulation
110 * based on leaf node depth. */
111 floating_t extra_komi;
113 /* We merge local (non-tenuki) sequences for both colors, occuring
114 * anywhere in the tree; nodes are created on-demand, special 'pass'
115 * nodes represent tenuki. Only u move_stats are used, prior and amaf
116 * is ignored. Values in root node are ignored. */
117 /* The values in the tree can be either "raw" or "tempered"
118 * (representing difference against parent node in the main tree),
119 * controlled by local_tree setting. */
120 struct tree_node *ltree_black;
121 // Of course even in white tree, winrates are from b's perspective
122 // as anywhere else. ltree_white has white-first sequences as children.
123 struct tree_node *ltree_white;
124 // Aging factor; 2 means halve all playout values after each turn.
125 // 1 means don't age at all.
126 floating_t ltree_aging;
128 /* Hash table used when working as slave for the distributed engine.
129 * Maps coordinate path to tree node. */
130 struct tree_hash *htable;
131 int hbits;
133 // Statistics
134 int max_depth;
135 volatile unsigned long nodes_size; // byte size of all allocated nodes
136 unsigned long max_tree_size; // maximum byte size for entire tree, > 0 only for fast_alloc
137 unsigned long max_pruned_size;
138 unsigned long pruning_threshold;
139 void *nodes; // nodes buffer, only for fast_alloc
142 /* Warning: all functions below except tree_expand_node & tree_leaf_node are THREAD-UNSAFE! */
143 struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size,
144 unsigned long max_pruned_size, unsigned long pruning_threshold, floating_t ltree_aging, int hbits);
145 void tree_done(struct tree *tree);
146 void tree_dump(struct tree *tree, int thres, bool unlimited);
147 void tree_save(struct tree *tree, struct board *b, int thres);
148 void tree_load(struct tree *tree, struct board *b);
150 struct tree_node *tree_get_node(struct tree *tree, struct tree_node *node, coord_t c, bool create);
151 struct tree_node *tree_garbage_collect(struct tree *tree, struct tree_node *node);
152 void tree_promote_node(struct tree *tree, struct tree_node **node);
153 bool tree_promote_at(struct tree *tree, struct board *b, coord_t c);
155 void tree_expand_node(struct tree *tree, struct tree_node *node, struct board *b, enum stone color, struct uct *u, int parity);
156 struct tree_node *tree_lnode_for_node(struct tree *tree, struct tree_node *ni, struct tree_node *lni, int tenuki_d);
158 static bool tree_leaf_node(struct tree_node *node);
160 #define tree_node_parity(tree, node) \
161 ((((node)->depth ^ (tree)->root->depth) & 1) ? -1 : 1)
163 /* Get black parity from parity within the tree. */
164 #define tree_parity(tree, parity) \
165 (tree->root_color == S_WHITE ? (parity) : -1 * (parity))
167 /* Get a 0..1 value to maximize; @parity is parity within the tree. */
168 #define tree_node_get_value(tree, parity, value) \
169 (tree_parity(tree, parity) > 0 ? value : 1 - value)
171 static inline bool
172 tree_leaf_node(struct tree_node *node)
174 return !(node->children);
177 static inline floating_t
178 tree_node_criticality(struct tree *t, struct tree_node *node)
180 /* cov(player_gets, player_wins) =
181 * [The argument: If 'gets' and 'wins' is uncorrelated, b_gets * b_wins
182 * is valid way to obtain winner_gets. The more correlated it is, the
183 * more distorted the result.]
184 * = winner_gets - (b_gets * b_wins + w_gets * w_wins)
185 * = winner_gets - (b_gets * b_wins + (1 - b_gets) * (1 - b_wins))
186 * = winner_gets - (b_gets * b_wins + 1 - b_gets - b_wins + b_gets * b_wins)
187 * = winner_gets - (2 * b_gets * b_wins - b_gets - b_wins + 1) */
188 return node->winner_owner.value
189 - (2 * node->black_owner.value * node->u.value
190 - node->black_owner.value - node->u.value + 1);
193 #endif