From c6ce1f2bb4f7fdc8973015204ca051d90bbc4d26 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Fri, 12 Feb 2010 04:48:21 +0100 Subject: [PATCH] UCT bestr_ratio: Implement to deal with confused playouts --- uct/internal.h | 2 +- uct/uct.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index 03b6f62..32379f1 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -22,7 +22,7 @@ struct uct { int games, gamelen; float resign_ratio; float loss_threshold; - double best2_ratio; + double best2_ratio, bestr_ratio; bool pass_all_alive; int expand_p; bool playout_amaf, playout_amaf_nakade; diff --git a/uct/uct.c b/uct/uct.c index f68acd7..863021d 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -483,7 +484,7 @@ uct_search_stop_early(struct uct *u, struct tree *t, struct board *b, static bool uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b, struct tree_node *best, struct tree_node *best2, - struct tree_node *winner, int i) + struct tree_node *bestr, struct tree_node *winner, int i) { if (!best) { if (UDEBUGL(2)) @@ -505,6 +506,20 @@ uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b, } } + if (u->bestr_ratio > 0) { + /* Check best, best_best value difference. If the best move + * and its best child do not give similar enough results, + * keep simulating. */ + if (bestr && bestr->u.playouts + && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) { + if (UDEBUGL(2)) + fprintf(stderr, "Bestr delta %f > threshold %f\n", + fabs((double)best->u.value - bestr->u.value), + u->bestr_ratio); + return true; + } + } + if (winner && winner != best) { /* Keep simulating if best explored * does not have also highest value. */ @@ -554,6 +569,7 @@ uct_search(struct uct *u, struct board *b, struct time_info *ti, enum stone colo struct tree_node *best = NULL; struct tree_node *best2 = NULL; // Second-best move. + struct tree_node *bestr = NULL; // best's best child. struct tree_node *winner = NULL; double busywait_interval = TREE_BUSYWAIT_INTERVAL; @@ -602,7 +618,9 @@ uct_search(struct uct *u, struct board *b, struct time_info *ti, enum stone colo if (desired_done) { if (u->policy->winner && u->policy->evaluate) winner = u->policy->winner(u->policy, ctx->t, ctx->t->root); - if (!uct_search_keep_looking(u, ctx->t, b, best, best2, winner, i)) + if (best) + bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign); + if (!uct_search_keep_looking(u, ctx->t, b, best, best2, bestr, winner, i)) break; } @@ -831,6 +849,11 @@ uct_state_init(char *arg, struct board *b) * first_best/second_best playouts ratio * is less than best2_ratio. */ u->best2_ratio = atof(optval); + } else if (!strcasecmp(optname, "bestr_ratio") && optval) { + /* If set, prolong simulating while + * best,best_best_child values delta + * is more than bestr_ratio. */ + u->bestr_ratio = atof(optval); } else if (!strcasecmp(optname, "playout_amaf")) { /* Whether to include random playout moves in * AMAF as well. (Otherwise, only tree moves -- 2.11.4.GIT