From f369591391a93294acb118dadebed6cc20ecea99 Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Sun, 11 Mar 2012 00:26:15 +0100 Subject: [PATCH] Give 0 or negative rave bonus to ko threats before taking the ko. --- playout.c | 1 + playout.h | 1 + uct/policy/ucb1amaf.c | 28 +++++++++++++++++++++++++--- uct/walk.c | 9 +++++---- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/playout.c b/playout.c index b09eb1a..048f186 100644 --- a/playout.c +++ b/playout.c @@ -134,6 +134,7 @@ play_random_game(struct playout_setup *setup, } if (amafmap) { assert(amafmap->gamelen < MAX_GAMELEN); + amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b); amafmap->game[amafmap->gamelen++] = coord; } diff --git a/playout.h b/playout.h index 68aff84..bc13f2d 100644 --- a/playout.h +++ b/playout.h @@ -84,6 +84,7 @@ struct playout_amafmap { * when reading some tactical positions in depth (even if * they are just one-stone-snapback). */ coord_t game[MAX_GAMELEN]; + bool is_ko_capture[MAX_GAMELEN]; int gamelen; /* Our current position in the game sequence; in AMAF, we search * the range [game_baselen, gamelen[ */ diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index e3f377c..12498c0 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -35,6 +35,9 @@ struct ucb1_policy_amaf { bool sylvain_rave; /* Give more weight to moves played earlier. */ int distance_rave; + /* Give 0 or negative rave bonus to ko threats before taking the ko. + 0=no bonus, 1=invert rave bonus, 2=double penalty, etc... */ + int threat_rave; /* Coefficient of local tree values embedded in RAVE. */ floating_t ltree_rave; /* Coefficient of criticality embedded in RAVE. */ @@ -247,6 +250,8 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, } stats_add_result(&node->u, result, 1); + bool capturing_ko = move + 1 < map->gamelen && map->is_ko_capture[move+1]; + /* This loop ignores symmetry considerations, but they should * matter only at a point when AMAF doesn't help much. */ assert(map->game_baselen >= 0); @@ -260,12 +265,26 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, int distance = first - (move + 1); if (distance & 1) continue; - /* Give more weight to moves played earlier */ int weight = 1; - if (b->distance_rave != 0) { + floating_t res = result; + + /* Don't give amaf bonus to a ko threat before taking the ko. + * http://www.grappa.univ-lille3.fr/~coulom/Aja_PhD_Thesis.pdf + * move+1: B captures a ko + * move+2: W plays a ko threat + * move+3: B answers ko threat + * move+4: W re-captures the ko + * move+5: B plays a ko threat + * then do not give a amaf bonus to this threat at level move+1, prefer taking ko. + */ + if (capturing_ko && distance == 4 && map->is_ko_capture[move+4]) { + weight = b->threat_rave; + res = 1.0 - res; + } else if (b->distance_rave != 0) { + /* Give more weight to moves played earlier */ weight += b->distance_rave * (map->gamelen - first) / (map->gamelen - move); } - stats_add_result(&ni->amaf, result, weight); + stats_add_result(&ni->amaf, res, weight); if (b->crit_amaf) { stats_add_result(&ni->winner_owner, board_local_value(b->crit_lvalue, final_board, node_coord(ni), winner_color), 1); @@ -308,6 +327,7 @@ policy_ucb1amaf_init(struct uct *u, char *arg, struct board *board) b->fpu = INFINITY; b->sylvain_rave = true; b->distance_rave = 3; + b->threat_rave = 0; b->ltree_rave = 0.75f; b->crit_rave = 1.1f; @@ -339,6 +359,8 @@ policy_ucb1amaf_init(struct uct *u, char *arg, struct board *board) b->sylvain_rave = !optval || *optval == '1'; } else if (!strcasecmp(optname, "distance_rave") && optval) { b->distance_rave = atoi(optval); + } else if (!strcasecmp(optname, "threat_rave") && optval) { + b->threat_rave = atoi(optval); } else if (!strcasecmp(optname, "ltree_rave") && optval) { b->ltree_rave = atof(optval); } else if (!strcasecmp(optname, "crit_rave") && optval) { diff --git a/uct/walk.c b/uct/walk.c index fb5683b..3c913e9 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -181,9 +181,10 @@ uct_progress_status(struct uct *u, struct tree *t, enum stone color, int playout static inline void -record_amaf_move(struct playout_amafmap *amaf, coord_t coord) +record_amaf_move(struct playout_amafmap *amaf, coord_t coord, bool is_ko_capture) { assert(amaf->gamelen < MAX_GAMELEN); + amaf->is_ko_capture[amaf->gamelen] = is_ko_capture; amaf->game[amaf->gamelen++] = coord; } @@ -468,9 +469,6 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree if (u->virtual_loss) stats_add_result(&n->u, node_color == S_BLACK ? 0.0 : 1.0, u->virtual_loss); - assert(node_coord(n) >= -1); - record_amaf_move(&amaf, node_coord(n)); - struct move m = { node_coord(n), node_color }; int res = board_play(&b2, &m); @@ -488,6 +486,9 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree goto end; } + assert(node_coord(n) >= -1); + record_amaf_move(&amaf, node_coord(n), board_playing_ko_threat(&b2)); + if (is_pass(node_coord(n))) passes++; else -- 2.11.4.GIT