From 2328b9f4ceea5c96065b2439b775908aaf3bcf2e Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Fri, 19 Mar 2010 15:05:37 +0100 Subject: [PATCH] UCT dynkomi: Also track value in resettable movestats --- uct/dynkomi.c | 15 +++++++++------ uct/tree.h | 12 ++++++------ uct/walk.c | 1 + 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/uct/dynkomi.c b/uct/dynkomi.c index c7e5663..007dccb 100644 --- a/uct/dynkomi.c +++ b/uct/dynkomi.c @@ -230,9 +230,13 @@ komi_by_score(struct dynkomi_adaptive *a, struct board *b, struct tree *tree) static float komi_by_value(struct dynkomi_adaptive *a, struct board *b, struct tree *tree) { - if (tree->root->u.playouts < TRUSTWORTHY_KOMI_PLAYOUTS) + if (tree->value.playouts < TRUSTWORTHY_KOMI_PLAYOUTS) return tree->extra_komi; + struct move_stats value = tree->value; + /* Almost-reset tree->value to gather fresh stats. */ + tree->value.playouts = 1; + /* We have three "value zones": * red zone | yellow zone | green zone * ~45% ~60% @@ -249,19 +253,18 @@ komi_by_value(struct dynkomi_adaptive *a, struct board *b, struct tree *tree) * to try to reduce extra komi we take. * * TODO: Make the latch expire after a while. */ - float value = tree->root->u.value; float extra_komi = tree->extra_komi; - if (value < a->zone_red) { + if (value.value < a->zone_red) { /* Red zone. Take extra komi. */ if (DEBUGL(3)) fprintf(stderr, "[red] %f, komi latch %f -> %f\n", - value, a->komi_latch, extra_komi); + value.value, a->komi_latch, extra_komi); if (extra_komi > 0) a->komi_latch = extra_komi; extra_komi -= a->score_step; // XXX: we depend on being black return extra_komi; - } else if (value < a->zone_green) { + } else if (value.value < a->zone_green) { /* Yellow zone, do nothing. */ return extra_komi; @@ -270,7 +273,7 @@ komi_by_value(struct dynkomi_adaptive *a, struct board *b, struct tree *tree) extra_komi += a->score_step; // XXX: we depend on being black if (DEBUGL(3)) fprintf(stderr, "[green] %f, += %d | komi latch %f\n", - value, a->score_step, a->komi_latch); + value.value, a->score_step, a->komi_latch); return !a->use_komi_latch || extra_komi < a->komi_latch ? extra_komi : a->komi_latch - 1; } } diff --git a/uct/tree.h b/uct/tree.h index 78acaaa..2ba9edf 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -95,13 +95,13 @@ struct tree { * based on leaf node depth. */ float extra_komi; - /* Information on average score at the game end (from black's - * perspective). */ - /* N.B.: Some users may need average score carried over from previous - * move to initialize. Thus, this information is NOT zeroed on node - * promotion, you are responsible for zeroing it for yourself if you - * use it. */ + /*** Only for dynkomi use: */ + /* Information on average score at the simulation end (black's + * perspective) since last dynkomi adjustment. */ struct move_stats score; + /* Information on average winrate of simulations since last + * dynkomi adjustment. */ + struct move_stats value; /* We merge local (non-tenuki) sequences for both colors, occuring * anywhere in the tree; nodes are created on-demand, special 'pass' diff --git a/uct/walk.c b/uct/walk.c index a68bdf0..2e6a15f 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -438,6 +438,7 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree stats_add_result(&t->score, result / 2, 1); float rval = scale_value(u, b, result); + stats_add_result(&t->value, rval, 1); u->policy->update(u->policy, t, n, node_color, player_color, amaf, rval); if (u->local_tree && n->parent && !is_pass(n->coord) && dlen > 0) { -- 2.11.4.GIT