From a0fa3202772aee10254e380b899a09f42f91c1aa Mon Sep 17 00:00:00 2001
From: Petr Baudis <pasky@ucw.cz>
Date: Sat, 11 Dec 2010 20:44:23 +0100
Subject: [PATCH] UCT TreePool: Introduce; replays best significant children
 during playout

---
 uct/internal.h |  10 ++++++
 uct/uct.c      |  46 ++++++++++++++++++++++++-
 uct/walk.c     | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/uct/internal.h b/uct/internal.h
index be23bcb..0991827 100644
--- a/uct/internal.h
+++ b/uct/internal.h
@@ -67,6 +67,16 @@ struct uct {
 	int val_points;
 	bool val_extra;
 
+	int treepool_chance[2];
+	int treepool_size;
+	enum treepool_type {
+		UTT_RAVE_PLAYOUTS,
+		UTT_RAVE_VALUE,
+		UTT_UCT_PLAYOUTS,
+		UTT_UCT_VALUE,
+		UTT_EVALUATE,
+	} treepool_type;
+
 	int random_policy_chance;
 	int local_tree;
 	int tenuki_d;
diff --git a/uct/uct.c b/uct/uct.c
index a0105ed..d29964d 100644
--- a/uct/uct.c
+++ b/uct/uct.c
@@ -898,13 +898,57 @@ uct_state_init(char *arg, struct board *b)
 
 			/** Other heuristics */
 			} else if (!strcasecmp(optname, "significant_threshold") && optval) {
-				/* Some heuristics (XXX: so far, none) rely
+				/* Some heuristics (treepool) rely
 				 * on the knowledge of the last "significant"
 				 * node in the descent. Such a node is
 				 * considered reasonably trustworthy to carry
 				 * some meaningful information in the values
 				 * of the node and its children. */
 				u->significant_threshold = atoi(optval);
+			} else if (!strcasecmp(optname, "treepool_chance") && optval) {
+				/* Chance of applying the treepool heuristic:
+				 * one of the best N children of the last
+				 * significant node is tried on each turn
+				 * of the simulation. */
+				/* This is in form of two numbers:
+				 * PREMOVE:POSTMOVE. Each is percentage
+				 * value, one is the chance of the move
+				 * tried before playout policy, one is the
+				 * chance of it being applied if the policy
+				 * has not picked anymove. */
+				char *optval2 = strchr(optval, ':');
+				if (!optval2) {
+					fprintf(stderr, "uct: treepool_chance takes two comma-separated numbers\n");
+					exit(1);
+				}
+				u->treepool_chance[0] = atoi(optval);
+				optval = ++optval2;
+				u->treepool_chance[1] = atoi(optval);
+			} else if (!strcasecmp(optname, "treepool_size") && optval) {
+				/* Number of top significant children
+				 * to pick from. Too low means low effect,
+				 * too high means even lousy moves are
+				 * played. */
+				u->treepool_size = atoi(optval);
+			} else if (!strcasecmp(optname, "treepool_type") && optval) {
+				/* How to sort the children. */
+				if (!strcasecmp(optval, "rave_playouts"))
+					u->treepool_type = UTT_RAVE_PLAYOUTS;
+				else if (!strcasecmp(optval, "rave_value"))
+					u->treepool_type = UTT_RAVE_VALUE;
+				else if (!strcasecmp(optval, "uct_playouts"))
+					u->treepool_type = UTT_UCT_PLAYOUTS;
+				else if (!strcasecmp(optval, "uct_value"))
+					u->treepool_type = UTT_UCT_VALUE;
+				else if (!strcasecmp(optval, "evaluate"))
+					/* The proper combination of RAVE,
+					 * UCT and prior, as used through
+					 * descent. */
+					u->treepool_type = UTT_EVALUATE;
+				else {
+					fprintf(stderr, "uct: unknown treepool_type %s\n", optval);
+					exit(1);
+				}
 
 			/** Distributed engine slaves setup */
 
diff --git a/uct/walk.c b/uct/walk.c
index 2432175..83f95f0 100644
--- a/uct/walk.c
+++ b/uct/walk.c
@@ -106,6 +106,9 @@ struct uct_playout_callback {
 	struct uct *uct;
 	struct tree *tree;
 	struct tree_node *lnode;
+
+	coord_t *treepool[2];
+	int treepool_n[2];
 };
 
 static void
@@ -176,7 +179,12 @@ uct_playout_hook(struct playout_policy *playout, struct playout_setup *setup, st
 {
 	struct uct_playout_callback *upc = setup->hook_data;
 
-	/* TODO: Fill me. */
+	if (upc->uct->treepool_chance[mode] > fast_random(100) && upc->treepool[color - 1]) {
+		assert(upc->treepool_n[color - 1] > 0);
+		coord_t treepool_move = upc->treepool[color - 1][fast_random(upc->treepool_n[color - 1])];
+		if (board_is_valid_play(b, treepool_move, color))
+			return treepool_move;
+	}
 	return pass;
 }
 
@@ -192,10 +200,71 @@ uct_playout_postpolicy(struct playout_policy *playout, struct playout_setup *set
 	return uct_playout_hook(playout, setup, b, color, 1);
 }
 
+double
+treepool_node_value(struct uct *u, struct tree *tree, int parity, struct tree_node *node)
+{
+	/* XXX: Playouts get cast to double */
+	switch (u->treepool_type) {
+		case UTT_RAVE_PLAYOUTS:
+			return node->amaf.playouts;
+		case UTT_RAVE_VALUE:
+			return tree_node_get_value(tree, parity, node->amaf.value);
+		case UTT_UCT_PLAYOUTS:
+			return node->u.playouts;
+		case UTT_UCT_VALUE:
+			return tree_node_get_value(tree, parity, node->u.value);
+		case UTT_EVALUATE:
+		{
+			struct uct_descent d = { .node = node };
+			assert(u->policy->evaluate);
+			return u->policy->evaluate(u->policy, tree, &d, parity);
+		}
+		default: assert(0);
+	}
+	return -1;
+}
+
+static void
+treepool_setup(struct uct_playout_callback *upc, struct tree_node *node, int color)
+{
+	struct uct *u = upc->uct;
+	int parity = ((node->depth ^ upc->tree->root->depth) & 1) ? -1 : 1;
+
+	/* XXX: Naive O(N^2) way. */
+	for (int i = 0; i < u->treepool_size; i++) {
+		/* For each item, find the highest
+		 * node not in the pool yet. */
+		struct tree_node *best = NULL;
+		double best_val = -1;
+
+		// first comes pass which we skip
+		for (struct tree_node *ni = node->children->sibling; ni; ni = ni->sibling) {
+			/* Do we already have it? */
+			bool have = false;
+			for (int j = 0; j < upc->treepool_n[color]; j++)
+				if (upc->treepool[color][j] == ni->coord) {
+					have = true;
+					break;
+				}
+			if (have)
+				continue;
+
+			double i_val = treepool_node_value(u, upc->tree, parity, ni);
+			if (i_val > best_val) {
+				best = node;
+				best_val = i_val;
+			}
+		}
+
+		if (!best) break;
+		upc->treepool[color][upc->treepool_n[color]++] = best->coord;
+	}
+}
+
 
 static int
 uct_leaf_node(struct uct *u, struct board *b, enum stone player_color,
-              struct playout_amafmap *amaf,
+              struct playout_amafmap *amaf, struct uct_descent *descent,
               struct tree *t, struct tree_node *n, enum stone node_color,
 	      char *spaces)
 {
@@ -218,14 +287,39 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color,
 			spaces, n->u.playouts, coord2sstr(n->coord, t->board),
 			tree_node_get_value(t, parity, n->u.value));
 
-	/* TODO: Don't necessarily restart the sequence walk when entering
-	 * playout. */
-	struct uct_playout_callback upc = { .uct = u, .tree = t, .lnode = NULL };
+	struct uct_playout_callback upc = {
+		.uct = u,
+		.tree = t,
+		/* TODO: Don't necessarily restart the sequence walk when
+		 * entering playout. */
+		.lnode = NULL,
+	};
+
 	if (u->local_tree_playout) {
 		/* N.B.: We know this is ELO playout. */
 		playout_elo_callback(u->playout, uct_playout_probdist, &upc);
 	}
 
+	coord_t pool[2][u->treepool_size];
+	if (u->treepool_chance[0] + u->treepool_chance[1] > 0) {
+		for (int color = 0; color < 2; color++) {
+			/* Prepare tree-based pool of moves to try forcing
+			 * during the playout. */
+			/* We consider the children of the last significant
+			 * node, picking top N choices. */
+			struct tree_node *n = descent->significant[color];
+			if (!n || !n->children || !n->children->sibling) {
+				/* No significant node, or it's childless or has
+				 * only pass as its child. */
+				upc.treepool[color] = NULL;
+				upc.treepool_n[color] = 0;
+			} else {
+				upc.treepool[color] = (coord_t *) &pool[color];
+				treepool_setup(&upc, n, color);
+			}
+		}
+	}
+
 	struct playout_setup ps = {
 		.gamelen = u->gamelen,
 		.mercymin = u->mercymin,
-- 
2.11.4.GIT