From a0fa3202772aee10254e380b899a09f42f91c1aa Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sat, 11 Dec 2010 20:44:23 +0100 Subject: [PATCH] UCT TreePool: Introduce; replays best significant children during playout --- uct/internal.h | 10 ++++++ uct/uct.c | 46 ++++++++++++++++++++++++- uct/walk.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 154 insertions(+), 6 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index be23bcb..0991827 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -67,6 +67,16 @@ struct uct { int val_points; bool val_extra; + int treepool_chance[2]; + int treepool_size; + enum treepool_type { + UTT_RAVE_PLAYOUTS, + UTT_RAVE_VALUE, + UTT_UCT_PLAYOUTS, + UTT_UCT_VALUE, + UTT_EVALUATE, + } treepool_type; + int random_policy_chance; int local_tree; int tenuki_d; diff --git a/uct/uct.c b/uct/uct.c index a0105ed..d29964d 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -898,13 +898,57 @@ uct_state_init(char *arg, struct board *b) /** Other heuristics */ } else if (!strcasecmp(optname, "significant_threshold") && optval) { - /* Some heuristics (XXX: so far, none) rely + /* Some heuristics (treepool) rely * on the knowledge of the last "significant" * node in the descent. Such a node is * considered reasonably trustworthy to carry * some meaningful information in the values * of the node and its children. */ u->significant_threshold = atoi(optval); + } else if (!strcasecmp(optname, "treepool_chance") && optval) { + /* Chance of applying the treepool heuristic: + * one of the best N children of the last + * significant node is tried on each turn + * of the simulation. */ + /* This is in form of two numbers: + * PREMOVE:POSTMOVE. Each is percentage + * value, one is the chance of the move + * tried before playout policy, one is the + * chance of it being applied if the policy + * has not picked anymove. */ + char *optval2 = strchr(optval, ':'); + if (!optval2) { + fprintf(stderr, "uct: treepool_chance takes two comma-separated numbers\n"); + exit(1); + } + u->treepool_chance[0] = atoi(optval); + optval = ++optval2; + u->treepool_chance[1] = atoi(optval); + } else if (!strcasecmp(optname, "treepool_size") && optval) { + /* Number of top significant children + * to pick from. Too low means low effect, + * too high means even lousy moves are + * played. */ + u->treepool_size = atoi(optval); + } else if (!strcasecmp(optname, "treepool_type") && optval) { + /* How to sort the children. */ + if (!strcasecmp(optval, "rave_playouts")) + u->treepool_type = UTT_RAVE_PLAYOUTS; + else if (!strcasecmp(optval, "rave_value")) + u->treepool_type = UTT_RAVE_VALUE; + else if (!strcasecmp(optval, "uct_playouts")) + u->treepool_type = UTT_UCT_PLAYOUTS; + else if (!strcasecmp(optval, "uct_value")) + u->treepool_type = UTT_UCT_VALUE; + else if (!strcasecmp(optval, "evaluate")) + /* The proper combination of RAVE, + * UCT and prior, as used through + * descent. */ + u->treepool_type = UTT_EVALUATE; + else { + fprintf(stderr, "uct: unknown treepool_type %s\n", optval); + exit(1); + } /** Distributed engine slaves setup */ diff --git a/uct/walk.c b/uct/walk.c index 2432175..83f95f0 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -106,6 +106,9 @@ struct uct_playout_callback { struct uct *uct; struct tree *tree; struct tree_node *lnode; + + coord_t *treepool[2]; + int treepool_n[2]; }; static void @@ -176,7 +179,12 @@ uct_playout_hook(struct playout_policy *playout, struct playout_setup *setup, st { struct uct_playout_callback *upc = setup->hook_data; - /* TODO: Fill me. */ + if (upc->uct->treepool_chance[mode] > fast_random(100) && upc->treepool[color - 1]) { + assert(upc->treepool_n[color - 1] > 0); + coord_t treepool_move = upc->treepool[color - 1][fast_random(upc->treepool_n[color - 1])]; + if (board_is_valid_play(b, treepool_move, color)) + return treepool_move; + } return pass; } @@ -192,10 +200,71 @@ uct_playout_postpolicy(struct playout_policy *playout, struct playout_setup *set return uct_playout_hook(playout, setup, b, color, 1); } +double +treepool_node_value(struct uct *u, struct tree *tree, int parity, struct tree_node *node) +{ + /* XXX: Playouts get cast to double */ + switch (u->treepool_type) { + case UTT_RAVE_PLAYOUTS: + return node->amaf.playouts; + case UTT_RAVE_VALUE: + return tree_node_get_value(tree, parity, node->amaf.value); + case UTT_UCT_PLAYOUTS: + return node->u.playouts; + case UTT_UCT_VALUE: + return tree_node_get_value(tree, parity, node->u.value); + case UTT_EVALUATE: + { + struct uct_descent d = { .node = node }; + assert(u->policy->evaluate); + return u->policy->evaluate(u->policy, tree, &d, parity); + } + default: assert(0); + } + return -1; +} + +static void +treepool_setup(struct uct_playout_callback *upc, struct tree_node *node, int color) +{ + struct uct *u = upc->uct; + int parity = ((node->depth ^ upc->tree->root->depth) & 1) ? -1 : 1; + + /* XXX: Naive O(N^2) way. */ + for (int i = 0; i < u->treepool_size; i++) { + /* For each item, find the highest + * node not in the pool yet. */ + struct tree_node *best = NULL; + double best_val = -1; + + // first comes pass which we skip + for (struct tree_node *ni = node->children->sibling; ni; ni = ni->sibling) { + /* Do we already have it? */ + bool have = false; + for (int j = 0; j < upc->treepool_n[color]; j++) + if (upc->treepool[color][j] == ni->coord) { + have = true; + break; + } + if (have) + continue; + + double i_val = treepool_node_value(u, upc->tree, parity, ni); + if (i_val > best_val) { + best = node; + best_val = i_val; + } + } + + if (!best) break; + upc->treepool[color][upc->treepool_n[color]++] = best->coord; + } +} + static int uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, - struct playout_amafmap *amaf, + struct playout_amafmap *amaf, struct uct_descent *descent, struct tree *t, struct tree_node *n, enum stone node_color, char *spaces) { @@ -218,14 +287,39 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, spaces, n->u.playouts, coord2sstr(n->coord, t->board), tree_node_get_value(t, parity, n->u.value)); - /* TODO: Don't necessarily restart the sequence walk when entering - * playout. */ - struct uct_playout_callback upc = { .uct = u, .tree = t, .lnode = NULL }; + struct uct_playout_callback upc = { + .uct = u, + .tree = t, + /* TODO: Don't necessarily restart the sequence walk when + * entering playout. */ + .lnode = NULL, + }; + if (u->local_tree_playout) { /* N.B.: We know this is ELO playout. */ playout_elo_callback(u->playout, uct_playout_probdist, &upc); } + coord_t pool[2][u->treepool_size]; + if (u->treepool_chance[0] + u->treepool_chance[1] > 0) { + for (int color = 0; color < 2; color++) { + /* Prepare tree-based pool of moves to try forcing + * during the playout. */ + /* We consider the children of the last significant + * node, picking top N choices. */ + struct tree_node *n = descent->significant[color]; + if (!n || !n->children || !n->children->sibling) { + /* No significant node, or it's childless or has + * only pass as its child. */ + upc.treepool[color] = NULL; + upc.treepool_n[color] = 0; + } else { + upc.treepool[color] = (coord_t *) &pool[color]; + treepool_setup(&upc, n, color); + } + } + } + struct playout_setup ps = { .gamelen = u->gamelen, .mercymin = u->mercymin, -- 2.11.4.GIT