UCT: Move prior computation out of tree policy
[pachi.git] / uct / tree.c
blob9999d063d8604a72ecc8443b04f7f313a891a03c
1 #include <assert.h>
2 #include <math.h>
3 #include <stddef.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
9 #include "board.h"
10 #include "debug.h"
11 #include "engine.h"
12 #include "move.h"
13 #include "playout.h"
14 #include "uct/internal.h"
15 #include "uct/prior.h"
16 #include "uct/tree.h"
19 static struct tree_node *
20 tree_init_node(struct tree *t, coord_t coord, int depth)
22 struct tree_node *n = calloc(1, sizeof(*n));
23 n->coord = coord;
24 n->depth = depth;
25 static long c = 1000000;
26 n->hash = c++;
27 if (depth > t->max_depth)
28 t->max_depth = depth;
29 return n;
32 struct tree *
33 tree_init(struct board *board, enum stone color)
35 struct tree *t = calloc(1, sizeof(*t));
36 t->board = board;
37 /* The root PASS move is only virtual, we never play it. */
38 t->root = tree_init_node(t, pass, 0);
39 t->root_symmetry = board->symmetry;
40 t->root_color = stone_other(color); // to research black moves, root will be white
41 return t;
45 static void
46 tree_done_node(struct tree *t, struct tree_node *n)
48 struct tree_node *ni = n->children;
49 while (ni) {
50 struct tree_node *nj = ni->sibling;
51 tree_done_node(t, ni);
52 ni = nj;
54 free(n);
57 void
58 tree_done(struct tree *t)
60 tree_done_node(t, t->root);
61 free(t);
65 static void
66 tree_node_dump(struct tree *tree, struct tree_node *node, int l, int thres)
68 for (int i = 0; i < l; i++) fputc(' ', stderr);
69 int children = 0;
70 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
71 children++;
72 /* We use 1 as parity, since for all nodes we want to know the
73 * win probability of _us_, not the node color. */
74 fprintf(stderr, "[%s] %f (%d/%d playouts [prior %d/%d amaf %d/%d]; hints %x; %d children) <%lld>\n",
75 coord2sstr(node->coord, tree->board),
76 tree_node_get_value(tree, node, u, 1),
77 tree_node_get_wins(tree, node, u, 1), node->u.playouts,
78 tree_node_get_wins(tree, node, prior, 1), node->prior.playouts,
79 tree_node_get_wins(tree, node, amaf, 1), node->amaf.playouts,
80 node->hints, children, node->hash);
82 /* Print nodes sorted by #playouts. */
84 struct tree_node *nbox[1000]; int nboxl = 0;
85 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
86 if (ni->u.playouts > thres)
87 nbox[nboxl++] = ni;
89 while (true) {
90 int best = -1;
91 for (int i = 0; i < nboxl; i++)
92 if (nbox[i] && (best < 0 || nbox[i]->u.playouts > nbox[best]->u.playouts))
93 best = i;
94 if (best < 0)
95 break;
96 tree_node_dump(tree, nbox[best], l + 1, /* node->u.value < 0.1 ? 0 : */ thres);
97 nbox[best] = NULL;
101 void
102 tree_dump(struct tree *tree, int thres)
104 if (thres && tree->root->u.playouts / thres > 100) {
105 /* Be a bit sensible about this; the opening book can create
106 * huge dumps at first. */
107 thres = tree->root->u.playouts / 100 * (thres < 1000 ? 1 : thres / 1000);
109 tree_node_dump(tree, tree->root, 0, thres);
113 static char *
114 tree_book_name(struct board *b)
116 static char buf[256];
117 if (b->handicap > 0) {
118 sprintf(buf, "uctbook-%d-%02.01f-h%d.pachitree", b->size - 2, b->komi, b->handicap);
119 } else {
120 sprintf(buf, "uctbook-%d-%02.01f.pachitree", b->size - 2, b->komi);
122 return buf;
125 static void
126 tree_node_save(FILE *f, struct tree_node *node, int thres)
128 fputc(1, f);
129 fwrite(((void *) node) + offsetof(struct tree_node, depth),
130 sizeof(struct tree_node) - offsetof(struct tree_node, depth),
131 1, f);
133 if (node->u.playouts >= thres)
134 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
135 tree_node_save(f, ni, thres);
137 fputc(0, f);
140 void
141 tree_save(struct tree *tree, struct board *b, int thres)
143 char *filename = tree_book_name(b);
144 FILE *f = fopen(filename, "wb");
145 if (!f) {
146 perror("fopen");
147 return;
149 tree_node_save(f, tree->root, thres);
150 fputc(0, f);
151 fclose(f);
155 void
156 tree_node_load(FILE *f, struct tree_node *node, int *num)
158 (*num)++;
160 fread(((void *) node) + offsetof(struct tree_node, depth),
161 sizeof(struct tree_node) - offsetof(struct tree_node, depth),
162 1, f);
164 /* Keep values in sane scale, otherwise we start overflowing.
165 * We may go slow here but we must be careful about not getting
166 * too huge integers.*/
167 #define MAX_PLAYOUTS 10000000
168 if (node->u.playouts > MAX_PLAYOUTS) {
169 int over = node->u.playouts - MAX_PLAYOUTS;
170 node->u.wins -= ((double) node->u.wins / node->u.playouts) * over;
171 node->u.playouts = MAX_PLAYOUTS;
173 if (node->amaf.playouts > MAX_PLAYOUTS) {
174 int over = node->amaf.playouts - MAX_PLAYOUTS;
175 node->amaf.wins -= ((double) node->amaf.wins / node->amaf.playouts) * over;
176 node->amaf.playouts = MAX_PLAYOUTS;
179 struct tree_node *ni = NULL, *ni_prev = NULL;
180 while (fgetc(f)) {
181 ni_prev = ni; ni = calloc(1, sizeof(*ni));
182 if (!node->children)
183 node->children = ni;
184 else
185 ni_prev->sibling = ni;
186 ni->parent = node;
187 tree_node_load(f, ni, num);
191 void
192 tree_load(struct tree *tree, struct board *b)
194 char *filename = tree_book_name(b);
195 FILE *f = fopen(filename, "rb");
196 if (!f)
197 return;
199 fprintf(stderr, "Loading opening book %s...\n", filename);
201 int num = 0;
202 if (fgetc(f))
203 tree_node_load(f, tree->root, &num);
204 fprintf(stderr, "Loaded %d nodes.\n", num);
206 fclose(f);
210 static struct tree_node *
211 tree_node_copy(struct tree_node *node)
213 struct tree_node *n2 = malloc(sizeof(*n2));
214 *n2 = *node;
215 if (!node->children)
216 return n2;
217 struct tree_node *ni = node->children;
218 struct tree_node *ni2 = tree_node_copy(ni);
219 n2->children = ni2; ni2->parent = n2;
220 while ((ni = ni->sibling)) {
221 ni2->sibling = tree_node_copy(ni);
222 ni2 = ni2->sibling; ni2->parent = n2;
224 return n2;
227 struct tree *
228 tree_copy(struct tree *tree)
230 struct tree *t2 = malloc(sizeof(*t2));
231 *t2 = *tree;
232 t2->root = tree_node_copy(tree->root);
233 return t2;
237 static void
238 tree_node_merge(struct tree_node *dest, struct tree_node *src)
240 dest->hints |= src->hints;
242 /* Merge the children, both are coord-sorted lists. */
243 struct tree_node *di = dest->children, **dref = &dest->children;
244 struct tree_node *si = src->children, **sref = &src->children;
245 while (di && si) {
246 if (di->coord != si->coord) {
247 /* src has some extra items or misses di */
248 struct tree_node *si2 = si->sibling;
249 while (si2 && di->coord != si2->coord) {
250 si2 = si2->sibling;
252 if (!si2)
253 goto next_di; /* src misses di, move on */
254 /* chain the extra [si,si2) items before di */
255 (*dref) = si;
256 while (si->sibling != si2) {
257 si->parent = dest;
258 si = si->sibling;
260 si->parent = dest;
261 si->sibling = di;
262 si = si2;
263 (*sref) = si;
265 /* Matching nodes - recurse... */
266 tree_node_merge(di, si);
267 /* ...and move on. */
268 sref = &si->sibling; si = si->sibling;
269 next_di:
270 dref = &di->sibling; di = di->sibling;
272 if (si) {
273 /* Some outstanding nodes are left on src side, rechain
274 * them to dst. */
275 (*dref) = si;
276 while (si) {
277 si->parent = dest;
278 si = si->sibling;
280 (*sref) = NULL;
283 /* Priors should be constant. */
284 assert(dest->prior.playouts == src->prior.playouts && dest->prior.wins == src->prior.wins);
286 dest->amaf.playouts += src->amaf.playouts;
287 dest->amaf.wins += src->amaf.wins;
288 if (dest->amaf.playouts)
289 dest->amaf.value = (float) dest->amaf.wins / dest->amaf.playouts;
291 dest->u.playouts += src->u.playouts;
292 dest->u.wins += src->u.wins;
293 if (dest->prior.playouts + dest->amaf.playouts + dest->u.playouts)
294 tree_update_node_value(dest);
297 /* Merge two trees built upon the same board. Note that the operation is
298 * destructive on src. */
299 void
300 tree_merge(struct tree *dest, struct tree *src)
302 if (src->max_depth > dest->max_depth)
303 dest->max_depth = src->max_depth;
304 tree_node_merge(dest->root, src->root);
308 static void
309 tree_node_normalize(struct tree_node *node, int factor)
311 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
312 tree_node_normalize(ni, factor);
314 #define normalize(s1, s2, t) node->s2.t = node->s1.t + (node->s2.t - node->s1.t) / factor;
315 normalize(pamaf, amaf, playouts);
316 normalize(pamaf, amaf, wins);
317 normalize(pamaf, amaf, value);
318 memcpy(&node->pamaf, &node->amaf, sizeof(node->amaf));
320 normalize(pu, u, playouts);
321 normalize(pu, u, wins);
322 normalize(pu, u, value);
323 memcpy(&node->pu, &node->u, sizeof(node->u));
324 #undef normalize
327 /* Normalize a tree, dividing the amaf and u values by given
328 * factor; otherwise, simulations run in independent threads
329 * two trees built upon the same board. To correctly handle
330 * results taken from previous simulation run, they are backed
331 * up in tree. */
332 void
333 tree_normalize(struct tree *tree, int factor)
335 tree_node_normalize(tree->root, factor);
339 /* Tree symmetry: When possible, we will localize the tree to a single part
340 * of the board in tree_expand_node() and possibly flip along symmetry axes
341 * to another part of the board in tree_promote_at(). We follow b->symmetry
342 * guidelines here. */
345 void
346 tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum stone color, int radar, struct uct *u, int parity)
348 struct tree_node *ni = tree_init_node(t, pass, node->depth + 1);
349 ni->parent = node; node->children = ni;
350 uct_prior(u, t, ni, b, color, parity);
352 /* The loop considers only the symmetry playground. */
353 if (UDEBUGL(6)) {
354 fprintf(stderr, "expanding %s within [%d,%d],[%d,%d] %d-%d\n",
355 coord2sstr(node->coord, b),
356 b->symmetry.x1, b->symmetry.y1,
357 b->symmetry.x2, b->symmetry.y2,
358 b->symmetry.type, b->symmetry.d);
360 for (int i = b->symmetry.x1; i <= b->symmetry.x2; i++) {
361 for (int j = b->symmetry.y1; j <= b->symmetry.y2; j++) {
362 if (b->symmetry.d) {
363 int x = b->symmetry.type == SYM_DIAG_DOWN ? board_size(b) - 1 - i : i;
364 if (x > j) {
365 if (UDEBUGL(7))
366 fprintf(stderr, "drop %d,%d\n", i, j);
367 continue;
371 coord_t c = coord_xy_otf(i, j, t->board);
372 if (board_at(b, c) != S_NONE)
373 continue;
374 assert(c != node->coord); // I have spotted "C3 C3" in some sequence...
375 /* This looks very useful on large boards - weeds out huge amount of crufty moves. */
376 if (b->hash /* not empty board */ && radar && !board_stone_radar(b, c, radar))
377 continue;
379 struct tree_node *nj = tree_init_node(t, c, node->depth + 1);
380 nj->parent = node; ni->sibling = nj; ni = nj;
382 uct_prior(u, t, ni, b, color, parity);
388 static coord_t
389 flip_coord(struct board *b, coord_t c,
390 bool flip_horiz, bool flip_vert, int flip_diag)
392 int x = coord_x(c, b), y = coord_y(c, b);
393 if (flip_diag) {
394 int z = x; x = y; y = z;
396 if (flip_horiz) {
397 x = board_size(b) - 1 - x;
399 if (flip_vert) {
400 y = board_size(b) - 1 - y;
402 return coord_xy_otf(x, y, b);
405 static void
406 tree_fix_node_symmetry(struct board *b, struct tree_node *node,
407 bool flip_horiz, bool flip_vert, int flip_diag)
409 if (!is_pass(node->coord))
410 node->coord = flip_coord(b, node->coord, flip_horiz, flip_vert, flip_diag);
412 for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
413 tree_fix_node_symmetry(b, ni, flip_horiz, flip_vert, flip_diag);
416 static void
417 tree_fix_symmetry(struct tree *tree, struct board *b, coord_t c)
419 if (is_pass(c))
420 return;
422 struct board_symmetry *s = &tree->root_symmetry;
423 int cx = coord_x(c, b), cy = coord_y(c, b);
425 /* playground X->h->v->d normalization
426 * :::.. .d...
427 * .::.. v....
428 * ..:.. .....
429 * ..... h...X
430 * ..... ..... */
431 bool flip_horiz = cx < s->x1 || cx > s->x2;
432 bool flip_vert = cy < s->y1 || cy > s->y2;
434 bool flip_diag = 0;
435 if (s->d) {
436 bool dir = (s->type == SYM_DIAG_DOWN);
437 int x = dir ^ flip_horiz ^ flip_vert ? board_size(b) - 1 - cx : cx;
438 if (flip_vert ? x < cy : x > cy) {
439 flip_diag = 1;
443 if (UDEBUGL(4)) {
444 fprintf(stderr, "%s will flip %d %d %d -> %s, sym %d (%d) -> %d (%d)\n",
445 coord2sstr(c, b), flip_horiz, flip_vert, flip_diag,
446 coord2sstr(flip_coord(b, c, flip_horiz, flip_vert, flip_diag), b),
447 s->type, s->d, b->symmetry.type, b->symmetry.d);
449 if (flip_horiz || flip_vert || flip_diag)
450 tree_fix_node_symmetry(b, tree->root, flip_horiz, flip_vert, flip_diag);
454 static void
455 tree_unlink_node(struct tree_node *node)
457 struct tree_node *ni = node->parent;
458 if (ni->children == node) {
459 ni->children = node->sibling;
460 } else {
461 ni = ni->children;
462 while (ni->sibling != node)
463 ni = ni->sibling;
464 ni->sibling = node->sibling;
466 node->sibling = NULL;
467 node->parent = NULL;
470 void
471 tree_delete_node(struct tree *tree, struct tree_node *node)
473 tree_unlink_node(node);
474 tree_done_node(tree, node);
477 void
478 tree_promote_node(struct tree *tree, struct tree_node *node)
480 assert(node->parent == tree->root);
481 tree_unlink_node(node);
482 tree_done_node(tree, tree->root);
483 tree->root = node;
484 tree->root_color = stone_other(tree->root_color);
485 board_symmetry_update(tree->board, &tree->root_symmetry, node->coord);
488 bool
489 tree_promote_at(struct tree *tree, struct board *b, coord_t c)
491 tree_fix_symmetry(tree, b, c);
493 for (struct tree_node *ni = tree->root->children; ni; ni = ni->sibling) {
494 if (ni->coord == c) {
495 tree_promote_node(tree, ni);
496 return true;
499 return false;
502 bool
503 tree_leaf_node(struct tree_node *node)
505 return !(node->children);
508 void
509 tree_update_node_value(struct tree_node *node)
511 bool noamaf = node->hints & NODE_HINT_NOAMAF;
512 node->u.value = (float)(node->u.wins + node->prior.wins + (!noamaf ? node->amaf.wins : 0))
513 / (node->u.playouts + node->prior.playouts + (!noamaf ? node->amaf.playouts : 0));
514 #if 0
515 { struct board b2; board_size(&b2) = 9+2;
516 fprintf(stderr, "%s->%s %d/%d %d/%d %f\n", node->parent ? coord2sstr(node->parent->coord, &b2) : NULL, coord2sstr(node->coord, &b2), node->u.wins, node->u.playouts, node->prior.wins, node->prior.playouts, node->u.value); }
517 #endif