From a72c00cfb7400ee73303ff9e86978b82ab9f666a Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Wed, 3 Aug 2011 19:53:57 +0200 Subject: [PATCH] UCT rave: simplify the rave udpate and nakade handling The next move by the same color is now computed accurately so there is no need for special nakade handling. --- playout.c | 19 ++++------------ playout.h | 33 +++++---------------------- uct/policy/ucb1amaf.c | 62 +++++++++++++++++++++------------------------------ uct/walk.c | 58 ++++++++++++----------------------------------- 4 files changed, 49 insertions(+), 123 deletions(-) diff --git a/playout.c b/playout.c index 4db5759..b09eb1a 100644 --- a/playout.c +++ b/playout.c @@ -130,23 +130,12 @@ play_random_game(struct playout_setup *setup, if (unlikely(is_pass(coord))) { passes++; } else { - /* We don't care about nakade counters, since we want - * to avoid taking pre-nakade moves into account only - * if they happenned in the tree before nakade nodes; - * but this is always out of the tree. */ - if (amafmap) { - if (amafmap->map[coord] == S_NONE || amafmap->map[coord] == color) - amafmap->map[coord] = color; - else if (amafmap->record_nakade) - amaf_op(amafmap->map[coord], +); - amafmap->game[amafmap->gamelen].coord = coord; - amafmap->game[amafmap->gamelen].color = color; - amafmap->gamelen++; - assert(amafmap->gamelen < sizeof(amafmap->game) / sizeof(amafmap->game[0])); - } - passes = 0; } + if (amafmap) { + assert(amafmap->gamelen < MAX_GAMELEN); + amafmap->game[amafmap->gamelen++] = coord; + } if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin) break; diff --git a/playout.h b/playout.h index 2182d4e..68aff84 100644 --- a/playout.h +++ b/playout.h @@ -78,39 +78,16 @@ struct playout_setup { struct playout_amafmap { - /* Record of the random playout - for each intersection: - * S_NONE: This move was never played - * S_BLACK: This move was played by black first - * S_WHITE: This move was played by white first - */ - enum stone *map; // [board_size2(b)] - - /* the lowest &0xf is the enum stone, upper bits are nakade - * counter - in case of nakade, we record only color of the - * first stone played inside, but count further throwins - * and ignore AMAF value after these. */ -#define amaf_nakade(item_) (item_ >> 8) -#define amaf_op(item_, op_) do { \ - int mi_ = item_; \ - item_ = (mi_ & 0xf) | ((amaf_nakade(mi_) op_ 1) << 8); \ -} while (0) - - /* Additionally, we keep record of the game so that we can + /* We keep record of the game so that we can * examine nakade moves; really going out of our way to * implement nakade AMAF properly turns out to be crucial * when reading some tactical positions in depth (even if * they are just one-stone-snapback). */ - struct move game[MAX_GAMELEN + 1]; - unsigned int gamelen; + coord_t game[MAX_GAMELEN]; + int gamelen; /* Our current position in the game sequence; in AMAF, we search - * the range [game_baselen, gamelen]. */ - unsigned int game_baselen; - - /* Whether to record the nakade moves (true) or just completely - * ignore them (false; just the first color on the intersection - * is stored in the map, nakade counter is not incremented; game - * record is still kept). */ - bool record_nakade; + * the range [game_baselen, gamelen[ */ + int game_baselen; }; diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 86757fb..fa8cd5e 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -195,7 +196,14 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, { struct ucb1_policy_amaf *b = p->data; enum stone winner_color = result > 0.5 ? S_BLACK : S_WHITE; - enum stone child_color = stone_other(node_color); + + /* Record of the random playout - for each intersection coord, + * first_move[coord] is the index map->game of the first move + * at this coordinate, or INT_MAX if the move was not played. + * The parity gives the color of this move. + */ + int first_map[board_size2(final_board)+1]; + int *first_move = &first_map[1]; // +1 for pass #if 0 struct board bb; bb.size = 9+2; @@ -205,66 +213,48 @@ ucb1amaf_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, node_color, result, player_color); #endif - while (node) { - if (node->parent == NULL) - assert(tree->root_color == stone_other(child_color)); + /* Initialize first_move */ + for (int i = pass; i < board_size2(final_board); i++) first_move[i] = INT_MAX; + int move; + assert(map->gamelen > 0); + for (move = map->gamelen - 1; move >= map->game_baselen; move--) + first_move[map->game[move]] = move; + while (node) { if (!b->crit_amaf && !is_pass(node_coord(node))) { stats_add_result(&node->winner_owner, board_local_value(b->crit_lvalue, final_board, node_coord(node), winner_color), 1); stats_add_result(&node->black_owner, board_local_value(b->crit_lvalue, final_board, node_coord(node), S_BLACK), 1); } stats_add_result(&node->u, result, 1); - if (!is_pass(node_coord(node)) && amaf_nakade(map->map[node_coord(node)])) - amaf_op(map->map[node_coord(node)], -); /* This loop ignores symmetry considerations, but they should * matter only at a point when AMAF doesn't help much. */ assert(map->game_baselen >= 0); for (struct tree_node *ni = node->children; ni; ni = ni->sibling) { - enum stone amaf_color = map->map[node_coord(ni)]; - assert(amaf_color != S_OFFBOARD); - if (amaf_color == S_NONE) + /* Use the child move only if it was first played by the same color. */ + int first = first_move[node_coord(ni)]; + if (first == INT_MAX || (first & 1) == (move & 1)) continue; - if (amaf_nakade(map->map[node_coord(ni)])) { - if (!b->check_nakade) - continue; - unsigned int i; - for (i = map->game_baselen; i < map->gamelen; i++) - if (map->game[i].coord == node_coord(ni) - && map->game[i].color == child_color) - break; - if (i == map->gamelen) - continue; - amaf_color = child_color; - } - - floating_t nres = result; - if (amaf_color != child_color) { - continue; - } - /* For child_color != player_color, we still want - * to record the result unmodified; in that case, - * we will correctly negate them at the descend phase. */ if (b->crit_amaf && !is_pass(node_coord(node))) { stats_add_result(&ni->winner_owner, board_local_value(b->crit_lvalue, final_board, node_coord(ni), winner_color), 1); stats_add_result(&ni->black_owner, board_local_value(b->crit_lvalue, final_board, node_coord(ni), S_BLACK), 1); } - stats_add_result(&ni->amaf, nres, 1); - + stats_add_result(&ni->amaf, result, 1); #if 0 struct board bb; bb.size = 9+2; fprintf(stderr, "* %s<%"PRIhash"> -> %s<%"PRIhash"> [%d/%f => %d/%f]\n", coord2sstr(node_coord(node), &bb), node->hash, coord2sstr(node_coord(ni), &bb), ni->hash, - player_color, result, child_color, nres); + player_color, result, move, result); #endif } - - if (!is_pass(node_coord(node))) { - map->game_baselen--; + if (node->parent) { + assert(move >= 0 && map->game[move] == node_coord(node)); + first_move[node_coord(node)] = move; + move--; } - node = node->parent; child_color = stone_other(child_color); + node = node->parent; } } diff --git a/uct/walk.c b/uct/walk.c index 56c0bf5..aa58fea 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -81,18 +81,11 @@ uct_progress_status(struct uct *u, struct tree *t, enum stone color, int playout } -static void -record_amaf_move(struct playout_amafmap *amaf, coord_t coord, enum stone color) +static inline void +record_amaf_move(struct playout_amafmap *amaf, coord_t coord) { - if (amaf->map[coord] == S_NONE || amaf->map[coord] == color) { - amaf->map[coord] = color; - } else { // XXX: Respect amaf->record_nakade - amaf_op(amaf->map[coord], +); - } - amaf->game[amaf->gamelen].coord = coord; - amaf->game[amaf->gamelen].color = color; - amaf->gamelen++; - assert(amaf->gamelen < sizeof(amaf->game) / sizeof(amaf->game[0])); + assert(amaf->gamelen < MAX_GAMELEN); + amaf->game[amaf->gamelen++] = coord; } @@ -274,12 +267,8 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree struct board b2; board_copy(&b2, b); - struct playout_amafmap *amaf = NULL; - if (u->policy->wants_amaf) { - amaf = calloc2(1, sizeof(*amaf)); - amaf->map = calloc2(board_size2(&b2) + 1, sizeof(*amaf->map)); - amaf->map++; // -1 is pass - } + struct playout_amafmap amaf; + amaf.gamelen = amaf.game_baselen = 0; /* Walk the tree until we find a leaf, then expand it and do * a random playout. */ @@ -365,8 +354,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree stats_add_result(&n->u, node_color == S_BLACK ? 0.0 : 1.0, u->virtual_loss); assert(node_coord(n) >= -1); - if (amaf && !is_pass(node_coord(n))) - record_amaf_move(amaf, node_coord(n), node_color); + record_amaf_move(&amaf, node_coord(n)); struct move m = { node_coord(n), node_color }; int res = board_play(&b2, &m); @@ -404,10 +392,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree tree_expand_node(t, n, &b2, next_color, u, -parity); } - if (amaf) { - amaf->game_baselen = amaf->gamelen; - amaf->record_nakade = u->playout_amaf_nakade; - } + amaf.game_baselen = amaf.gamelen; if (t->use_extra_komi && u->dynkomi->persim) { b2.komi += round(u->dynkomi->persim(u->dynkomi, &b2, t, n)); @@ -432,31 +417,20 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree } else { // assert(tree_leaf_node(n)); /* In case of parallel tree search, the assertion might * not hold if two threads chew on the same node. */ - result = uct_leaf_node(u, &b2, player_color, amaf, descent, &dlen, significant, t, n, node_color, spaces); + result = uct_leaf_node(u, &b2, player_color, &amaf, descent, &dlen, significant, t, n, node_color, spaces); } - if (amaf && u->playout_amaf_cutoff) { - unsigned int cutoff = amaf->game_baselen; - cutoff += (amaf->gamelen - amaf->game_baselen) * u->playout_amaf_cutoff / 100; - /* Now, reconstruct the amaf map. */ - memset(amaf->map, 0, board_size2(&b2) * sizeof(*amaf->map)); - for (unsigned int i = 0; i < cutoff; i++) { - coord_t coord = amaf->game[i].coord; - enum stone color = amaf->game[i].color; - if (amaf->map[coord] == S_NONE || amaf->map[coord] == color) { - amaf->map[coord] = color; - /* Nakade always recorded for in-tree part */ - } else if (amaf->record_nakade || i <= amaf->game_baselen) { - amaf_op(amaf->map[node_coord(n)], +); - } - } + if (u->policy->wants_amaf && u->playout_amaf_cutoff) { + unsigned int cutoff = amaf.game_baselen; + cutoff += (amaf.gamelen - amaf.game_baselen) * u->playout_amaf_cutoff / 100; + amaf.gamelen = cutoff; } /* Record the result. */ assert(n == t->root || n->parent); floating_t rval = scale_value(u, b, result); - u->policy->update(u->policy, t, n, node_color, player_color, amaf, &b2, rval); + u->policy->update(u->policy, t, n, node_color, player_color, &amaf, &b2, rval); if (t->use_extra_komi) { stats_add_result(&u->dynkomi->score, result / 2, 1); @@ -503,10 +477,6 @@ end: } } - if (amaf) { - free(amaf->map - 1); - free(amaf); - } board_done_noalloc(&b2); return result; } -- 2.11.4.GIT