From 0047f19e0a2332a30d4d0b214fb2d07dcea71dfe Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Fri, 4 Apr 2008 03:33:40 +0200 Subject: [PATCH] UCT: Keep prior moves in separate stats This e.g. keeps grandfather from picking up prior noise. --- uct/policy/ucb1.c | 20 ++++++++++---------- uct/policy/ucb1amaf.c | 2 +- uct/tree.h | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c index da66c27..28b4fe4 100644 --- a/uct/policy/ucb1.c +++ b/uct/policy/ucb1.c @@ -81,8 +81,8 @@ ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, stru #if 0 /* Q_{even} */ /* This somehow does not work at all. */ - node->u.playouts += p->eqex; - node->u.wins += p->eqex / 2; + node->prior.playouts += p->eqex; + node->prior.wins += p->eqex / 2; #endif /* Q_{grandparent} */ @@ -91,8 +91,8 @@ ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, stru for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) { /* Be careful not to emphasize too random results. */ if (ni->coord == node->coord && ni->u.playouts > pp->eqex) { - node->u.playouts += pp->eqex; - node->u.wins += pp->eqex * ni->u.wins / ni->u.playouts; + node->prior.playouts += pp->eqex; + node->prior.wins += pp->eqex * ni->u.wins / ni->u.playouts; node->hints |= 1; } } @@ -108,15 +108,15 @@ ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, stru if (!isnan(assess)) { if (parity < 0) assess = 1 - assess; - node->u.playouts += pp->eqex; - node->u.wins += pp->eqex * assess; + node->prior.playouts += pp->eqex; + node->prior.wins += pp->eqex * assess; node->hints |= 2; } - if (node->u.playouts) - node->u.value = (float) node->u.wins / node->u.playouts; + if (node->prior.playouts) + node->prior.value = (float) node->prior.wins / node->prior.playouts; - //fprintf(stderr, "%s,%s prior: %d/%d = %f (%f)\n", coord2sstr(node->parent->coord, b), coord2sstr(node->coord, b), node->u.wins, node->u.playouts, node->u.value, assess); + //fprintf(stderr, "%s,%s prior: %d/%d = %f (%f)\n", coord2sstr(node->parent->coord, b), coord2sstr(node->coord, b), node->prior.wins, node->prior.playouts, node->prior.value, assess); } void @@ -129,7 +129,7 @@ ucb1_update(struct uct_policy *p, struct tree_node *node, enum stone color, stru for (; node; node = node->parent) { node->u.playouts++; node->u.wins += result; - node->u.value = (float)node->u.wins / node->u.playouts; + node->u.value = ((float)node->u.wins + (float)node->prior.wins) / (node->u.playouts + node->prior.playouts); } } diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 2443824..eef4fdd 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -39,7 +39,7 @@ update_node(struct uct_policy *p, struct tree_node *node, int result) { node->u.playouts++; node->u.wins += result; - node->u.value = (float)node->u.wins / node->u.playouts; + node->u.value = ((float)node->u.wins + (float)node->prior.wins) / (node->u.playouts + node->prior.playouts); } void diff --git a/uct/tree.h b/uct/tree.h index 2d9eced..d1b4238 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -35,6 +35,7 @@ struct tree_node { coord_t coord; struct move_stats u; + struct move_stats prior; int hints; }; -- 2.11.4.GIT