From 0e04211ede3794e28a274fee8272b12ef2b2e8a0 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Mon, 25 Apr 2011 02:22:44 +0200 Subject: [PATCH] UCB1AMAF: Add criticality support, with many tuning options crit_rave=2, crit_min_playouts=32, crit_negative=1, crit_amaf=1 --- uct/policy/ucb1amaf.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 727b68c..93e028e 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -29,6 +29,11 @@ struct ucb1_policy_amaf { bool sylvain_rave; /* Coefficient of local tree values embedded in RAVE. */ floating_t ltree_rave; + /* Coefficient of criticality embedded in RAVE. */ + floating_t crit_rave; + int crit_min_playouts; + bool crit_negative; + bool crit_amaf; }; @@ -99,6 +104,22 @@ ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *d stats_merge(&r, &l); } + /* Criticality heuristics. */ + if (b->crit_rave > 0 && node->u.playouts > b->crit_min_playouts) { + floating_t crit = tree_node_criticality(tree, node); + if (b->crit_negative || crit > 0) { + struct move_stats c = { + .value = tree_node_get_value(tree, parity, 1.0f), + .playouts = crit * r.playouts * b->crit_rave + }; + LTREE_DEBUG fprintf(stderr, "[crit] adding %f%%%d to [%s] RAVE %f%%%d\n", + c.value, c.playouts, + coord2sstr(node->coord, tree->board), r.value, r.playouts); + stats_merge(&r, &c); + } + } + + floating_t value = 0; if (n.playouts) { if (r.playouts) { @@ -172,13 +193,14 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, if (node->parent == NULL) assert(tree->root_color == stone_other(child_color)); + if (!b->crit_amaf) { + stats_add_result(&node->winner_owner, board_at(final_board, node->coord) == winner_color ? 1.0 : 0.0, 1); + stats_add_result(&node->black_owner, board_at(final_board, node->coord) == S_BLACK ? 1.0 : 0.0, 1); + } stats_add_result(&node->u, result, 1); if (amaf_nakade(map->map[node->coord])) amaf_op(map->map[node->coord], -); - stats_add_result(&node->winner_owner, board_at(final_board, node->coord) == winner_color ? 1.0 : 0.0, 1); - stats_add_result(&node->black_owner, board_at(final_board, node->coord) == S_BLACK ? 1.0 : 0.0, 1); - /* This loop ignores symmetry considerations, but they should * matter only at a point when AMAF doesn't help much. */ assert(map->game_baselen >= 0); @@ -208,6 +230,10 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, * to record the result unmodified; in that case, * we will correctly negate them at the descend phase. */ + if (b->crit_amaf) { + stats_add_result(&ni->winner_owner, board_at(final_board, ni->coord) == winner_color ? 1.0 : 0.0, 1); + stats_add_result(&ni->black_owner, board_at(final_board, ni->coord) == S_BLACK ? 1.0 : 0.0, 1); + } stats_add_result(&ni->amaf, nres, 1); #if 0 @@ -247,6 +273,10 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->check_nakade = true; b->sylvain_rave = true; b->ltree_rave = 0.75f; + b->crit_rave = 2.0f; + b->crit_min_playouts = 32; + b->crit_negative = 1; + b->crit_amaf = 1; if (arg) { char *optspec, *next = arg; @@ -271,6 +301,14 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->check_nakade = !optval || *optval == '1'; } else if (!strcasecmp(optname, "ltree_rave") && optval) { b->ltree_rave = atof(optval); + } else if (!strcasecmp(optname, "crit_rave") && optval) { + b->crit_rave = atof(optval); + } else if (!strcasecmp(optname, "crit_min_playouts") && optval) { + b->crit_min_playouts = atoi(optval); + } else if (!strcasecmp(optname, "crit_negative")) { + b->crit_negative = !optval || *optval == '1'; + } else if (!strcasecmp(optname, "crit_amaf")) { + b->crit_amaf = !optval || *optval == '1'; } else { fprintf(stderr, "ucb1amaf: Invalid policy argument %s or missing value\n", optname); -- 2.11.4.GIT