From 8743ea5ca7def209250011032670d74a1c86ba16 Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Thu, 21 Jul 2011 16:19:36 +0200 Subject: [PATCH] UCT: introduce val_scale_max Give more weight to territory when winning big (maximize win). This reduces the number of silly moves and makes the game more enjoyable for humans. --- uct/internal.h | 2 +- uct/uct.c | 8 ++++++-- uct/walk.c | 39 ++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index 2aefd17..fdd041a 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -64,7 +64,7 @@ struct uct { int dynkomi_interval; struct uct_dynkomi *dynkomi; - floating_t val_scale; + floating_t val_scale, val_scale_max; int val_points; bool val_extra; diff --git a/uct/uct.c b/uct/uct.c index 9234e0b..b2010d1 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -558,7 +558,7 @@ uct_state_init(char *arg, struct board *b) u->best2_ratio = 2.5; u->max_maintime_ratio = 8.0; - u->val_scale = 0.04; u->val_points = 40; + u->val_scale = 0.1; u->val_scale_max = 0.2; u->dynkomi_interval = 1000; u->dynkomi_mask = S_BLACK | S_WHITE; @@ -865,9 +865,13 @@ uct_state_init(char *arg, struct board *b) /* How much of the game result value should be * influenced by win size. Zero means it isn't. */ u->val_scale = atof(optval); + } else if (!strcasecmp(optname, "val_scale_max") && optval) { + /* Value of the result scale when winning big + * (above 1 - resign_threshold) */ + u->val_scale_max = atof(optval); } else if (!strcasecmp(optname, "val_points") && optval) { /* Maximum size of win to be scaled into game - * result value. Zero means boardsize^2. */ + * result value. Zero means boardsize^2/10. */ u->val_points = atoi(optval) * 2; // result values are doubled } else if (!strcasecmp(optname, "val_extra")) { /* If false, the score coefficient will be simply diff --git a/uct/walk.c b/uct/walk.c index 7bc1e3a..2da7218 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -181,22 +181,31 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, static floating_t scale_value(struct uct *u, struct board *b, int result) { - floating_t rval = result > 0 ? 1.0 : result < 0 ? 0.0 : 0.5; - if (u->val_scale && result != 0) { - int vp = u->val_points; - if (!vp) { - vp = board_size(b) - 1; vp *= vp; vp *= 2; - } - - floating_t sval = (floating_t) abs(result) / vp; - sval = sval > 1 ? 1 : sval; - if (result < 0) sval = 1 - sval; - if (u->val_extra) - rval += u->val_scale * sval; - else - rval = (1 - u->val_scale) * rval + u->val_scale * sval; - // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval); + if (result == 0) return 0.5; + floating_t rval = result > 0 ? 1.0 : 0.0; + + floating_t scale = u->val_scale; + /* Give more weight to territory when winning big (maximize win). This reduces + * the number of silly moves and makes the game more enjoyable for humans. */ + if (u->t->root->u.playouts > GJ_MINGAMES && + tree_node_get_value(u->t, -1, u->t->root->u.value) >= u->sure_win_threshold) { + scale = u->val_scale_max; } + if (scale == 0) return rval; + + int vp = u->val_points; + /* By default do not try to win by more than 44 points on 19x19, + * 12 points on 9x9. Remember that result here is twice the score. */ + if (!vp) vp = board_size2(b) / 5; + + floating_t sval = (floating_t) abs(result) / vp; + sval = sval > 1 ? 1 : sval; + if (result < 0) sval = 1 - sval; + if (u->val_extra) + rval += scale * sval; + else + rval = (1 - scale) * rval + scale * sval; + // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval); return rval; } -- 2.11.4.GIT