From 659462be7f545ea5057aa52dc14da3ee153e8250 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Fri, 2 Oct 2009 21:12:29 +0200 Subject: [PATCH] UCT Prior: Support for cfgd prior - group-metric distance --- tactics.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ tactics.h | 8 ++++++++ uct/internal.h | 2 +- uct/prior.c | 26 ++++++++++++++++++++++++++ uct/uct.c | 5 ++++- 5 files changed, 88 insertions(+), 2 deletions(-) diff --git a/tactics.c b/tactics.c index 12a340e..4d447f6 100644 --- a/tactics.c +++ b/tactics.c @@ -314,3 +314,52 @@ board_stone_radar(struct board *b, coord_t coord, int distance) } return false; } + +void +cfg_distances(struct board *b, coord_t start, int *distances, int maxdist) +{ + /* Queue for d+1 spots; no two spots of the same group + * should appear in the queue. */ +#define qinc(x) (x = ((x + 1) >= board_size2(b) ? ((x) + 1 - board_size2(b)) : (x) + 1)) + coord_t queue[board_size2(b)]; int qstart = 0, qstop = 0; + + foreach_point(b) { + distances[c] = board_at(b, c) == S_OFFBOARD ? maxdist + 1 : -1; + } foreach_point_end; + + queue[qstop++] = start; + for (int d = 0; d <= maxdist; d++) { + /* Process queued moves, while setting the queue + * for new wave. */ + int qa = qstart, qb = qstop; + qstart = qstop; + for (int q = qa; q < qb; qinc(q)) { +#define cfg_one(coord, grp) do {\ + distances[coord] = d; \ + foreach_neighbor (b, coord, { \ + if (distances[c] < 0 && (!grp || group_at(b, coord) != grp)) { \ + queue[qstop] = c; \ + qinc(qstop); \ + } \ + }); \ +} while (0) + coord_t cq = queue[q]; + if (distances[cq] >= 0) + continue; /* We already looked here. */ + if (board_at(b, cq) == S_NONE) { + cfg_one(cq, 0); + } else { + group_t g = group_at(b, cq); + foreach_in_group(b, g) { + cfg_one(c, g); + } foreach_in_group_end; + } +#undef cfg_one + } + } + + foreach_point(b) { + if (distances[c] < 0) + distances[c] = maxdist + 1; + } foreach_point_end; +} diff --git a/tactics.h b/tactics.h index 3c70422..c893acd 100644 --- a/tactics.h +++ b/tactics.h @@ -15,6 +15,14 @@ static bool is_bad_selfatari(struct board *b, enum stone color, coord_t to); /* Checks if there are any stones in n-vincinity of coord. */ bool board_stone_radar(struct board *b, coord_t coord, int distance); +/* Construct a "common fate graph" from given coordinate; that is, a weighted + * graph of intersections where edges between all neighbors have weight 1, + * but edges between neighbors of same color have weight 0. Thus, this is + * "stone chain" metric in a sense. */ +/* The output are distanes from start stored in given [board_size2()] array; + * intersections further away than maxdist have all distance maxdist+1 set. */ +void cfg_distances(struct board *b, coord_t start, int *distances, int maxdist); + bool is_bad_selfatari_slow(struct board *b, enum stone color, coord_t to); static inline bool diff --git a/uct/internal.h b/uct/internal.h index b40e94d..befe21e 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -31,7 +31,7 @@ struct uct { /* Equivalent experience for prior knowledge. MoGo paper recommends * 50 playouts per source; in practice, esp. with RAVE, about 6 * playouts per source seems best. */ - int eqex, even_eqex, gp_eqex, policy_eqex, b19_eqex; + int eqex, even_eqex, gp_eqex, policy_eqex, b19_eqex, cfgd_eqex; struct uct_policy *policy; struct tree *t; diff --git a/uct/prior.c b/uct/prior.c index de470a0..2a5e396 100644 --- a/uct/prior.c +++ b/uct/prior.c @@ -101,6 +101,30 @@ uct_prior_playout(struct uct *u, struct tree_node *node, struct prior_map *map) } void +uct_prior_cfgd(struct uct *u, struct tree_node *node, struct prior_map *map) +{ + /* Q_{common_fate_graph_distance} */ + /* Give bonus to moves local to the last move, where "local" means + * local in terms of groups, not just manhattan distance. */ + if (is_pass(map->b->last_move.coord)) + return; + + int distances[board_size2(map->b)]; + cfg_distances(map->b, map->b->last_move.coord, distances, 3); + foreach_point(map->b) { + if (!map->consider[c]) + continue; + // fprintf(stderr, "distance %s-%s: %d\n", coord2sstr(map->b->last_move.coord, map->b), coord2sstr(c, map->b), distances[c]); + if (distances[c] > 3) + continue; + assert(distances[c] != 0); + int bonuses[] = { 0, u->cfgd_eqex, u->cfgd_eqex / 2, u->cfgd_eqex / 2 }; + int bonus = bonuses[distances[c]]; + add_prior_value(map, c, bonus, bonus); + } foreach_point_end; +} + +void uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map) { if (u->even_eqex) @@ -112,4 +136,6 @@ uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map) uct_prior_grandparent(u, node, map); if (u->policy_eqex) uct_prior_playout(u, node, map); + if (u->cfgd_eqex) + uct_prior_cfgd(u, node, map); } diff --git a/uct/uct.c b/uct/uct.c index b268ef7..b604e1e 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -510,7 +510,7 @@ uct_state_init(char *arg) u->amaf_prior = true; // gp: 14 vs 0: 44% (+-3.5) - u->gp_eqex = u->b19_eqex = 0; + u->gp_eqex = u->b19_eqex = u->cfgd_eqex = 0; u->even_eqex = u->policy_eqex = -1; u->eqex = 6; /* Even number! */ @@ -598,6 +598,8 @@ uct_state_init(char *arg) u->policy_eqex = atoi(optval); } else if (!strcasecmp(optname, "prior_b19") && optval) { u->b19_eqex = atoi(optval); + } else if (!strcasecmp(optname, "prior_cfgd") && optval) { + u->cfgd_eqex = atoi(optval); } else if (!strcasecmp(optname, "amaf_prior") && optval) { u->amaf_prior = atoi(optval); } else if (!strcasecmp(optname, "threads") && optval) { @@ -616,6 +618,7 @@ uct_state_init(char *arg) if (u->gp_eqex < 0) u->gp_eqex = u->eqex; if (u->policy_eqex < 0) u->policy_eqex = u->eqex; if (u->b19_eqex < 0) u->b19_eqex = u->eqex; + if (u->cfgd_eqex < 0) u->cfgd_eqex = u->eqex; u->resign_ratio = 0.2; /* Resign when most games are lost. */ u->loss_threshold = 0.85; /* Stop reading if after at least 5000 playouts this is best value. */ -- 2.11.4.GIT