From 22027458d13db2494ff76e415a51b100e554dea4 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sun, 29 Jul 2012 01:04:52 +0200 Subject: [PATCH] Support for value scaling with variable coefficient: val_bytemp, val_bytemp_min --- uct/internal.h | 2 ++ uct/uct.c | 9 +++++++++ uct/walk.c | 16 ++++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index ae25701..62bb934 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -80,6 +80,8 @@ struct uct { int val_points; bool val_extra; bool val_byavg; + bool val_bytemp; + floating_t val_bytemp_min; int random_policy_chance; bool local_tree; diff --git a/uct/uct.c b/uct/uct.c index 4aac18c..d1ebe25 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -911,6 +911,15 @@ uct_state_init(char *arg, struct board *b) * be relative to average score in the current * search episode inst. of jigo. */ u->val_byavg = !optval || atoi(optval); + } else if (!strcasecmp(optname, "val_bytemp")) { + /* If true, the value scaling coefficient + * is different based on value extremity + * (dist. from 0.5), linear between + * val_bytemp_min, val_scale. */ + u->val_bytemp = !optval || atoi(optval); + } else if (!strcasecmp(optname, "val_bytemp_min") && optval) { + /* Minimum val_scale in case of val_bytemp. */ + u->val_bytemp_min = atof(optval); /** Local trees */ /* (Purely experimental. Does not work - yet!) */ diff --git a/uct/walk.c b/uct/walk.c index 278bc25..2ad2c63 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -268,7 +268,7 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, } static floating_t -scale_value(struct uct *u, struct board *b, int result) +scale_value(struct uct *u, struct board *b, enum stone node_color, struct tree_node *significant[2], int result) { floating_t rval = result > 0 ? 1.0 : result < 0 ? 0.0 : 0.5; if (u->val_scale && result != 0) { @@ -278,6 +278,14 @@ scale_value(struct uct *u, struct board *b, int result) result -= u->t->avg_score.value * 2; } + double scale = u->val_scale; + if (u->val_bytemp) { + /* xvalue is 0 at 0.5, 1 at 0 or 1 */ + /* No correction for parity necessary. */ + double xvalue = significant[node_color] ? fabs(significant[node_color]->u.value - 0.5) * 2 : 0; + scale = u->val_bytemp_min + (u->val_scale - u->val_bytemp_min) * xvalue; + } + int vp = u->val_points; if (!vp) { vp = board_size(b) - 1; vp *= vp; vp *= 2; @@ -287,9 +295,9 @@ scale_value(struct uct *u, struct board *b, int result) sval = sval > 1 ? 1 : sval; if (result < 0) sval = 1 - sval; if (u->val_extra) - rval += u->val_scale * sval; + rval += scale * sval; else - rval = (1 - u->val_scale) * rval + u->val_scale * sval; + rval = (1 - scale) * rval + scale * sval; // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval); } return rval; @@ -557,7 +565,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree /* Record the result. */ assert(n == t->root || n->parent); - floating_t rval = scale_value(u, b, result); + floating_t rval = scale_value(u, b, node_color, significant, result); u->policy->update(u->policy, t, n, node_color, player_color, &amaf, &b2, rval); stats_add_result(&t->avg_score, result / 2, 1); -- 2.11.4.GIT