From 09605298f561841cdc24bd4d44ce05faa03ec872 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Fri, 4 Apr 2008 14:49:55 +0200 Subject: [PATCH] UCB: Account for prior values properly when descending early --- uct/policy/ucb1.c | 4 +++- uct/policy/ucb1amaf.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c index 1b90dc5..67f36b8 100644 --- a/uct/policy/ucb1.c +++ b/uct/policy/ucb1.c @@ -63,7 +63,9 @@ ucb1_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, in /* Do not consider passing early. */ if (likely(!allow_pass) && unlikely(is_pass(ni->coord))) continue; - float urgency = ni->u.playouts ? (parity > 0 ? ni->u.value : 1 - ni->u.value) + sqrt(xpl / ni->u.playouts) : b->fpu; + ni->prior.value = (float)ni->prior.wins / ni->prior.playouts; + float priorp = (parity > 0 ? ni->prior.value : 1- ni->prior.value); + float urgency = ni->u.playouts ? (parity > 0 ? ni->u.value : 1 - ni->u.value) + sqrt(xpl / ni->u.playouts) : ni->prior.playouts ? priorp : b->fpu; if (urgency > best_urgency) { best_urgency = urgency; nbest = ni; diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 90cd6cb..0e5528c 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -52,9 +52,11 @@ ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node if (likely(!allow_pass) && unlikely(is_pass(ni->coord))) continue; ni->amaf.value = (float)ni->amaf.wins / ni->amaf.playouts; + ni->prior.value = (float)ni->prior.wins / ni->prior.playouts; float uctp = (parity > 0 ? ni->u.value : 1 - ni->u.value) + sqrt(xpl / ni->u.playouts); float ravep = (parity > 0 ? ni->amaf.value : 1 - ni->amaf.value) + sqrt(xpl_rave / ni->amaf.playouts); - float urgency = ni->u.playouts ? beta * ravep + (1 - beta) * uctp : b->fpu; + float priorp = (parity > 0 ? ni->prior.value : 1- ni->prior.value); + float urgency = ni->u.playouts ? beta * ravep + (1 - beta) * uctp : ni->prior.playouts ? priorp : b->fpu; //fprintf(stderr, "u %f (%d/%d) r %f (%f %d/%d) b %f -> %f\n", uctp, ni->u.wins, ni->u.playouts, ravep, xpl_rave, ni->amaf.wins, ni->amaf.playouts, beta, urgency); if (urgency > best_urgency) { best_urgency = urgency; -- 2.11.4.GIT