From 1af8b42ea030b0c5fa3e3360ae8b35e461c49fc7 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Thu, 24 Sep 2009 04:19:41 +0200 Subject: [PATCH] UCB1: Return to old pass choosing behaviour The negative prior was causing big problems at post-endgame stages, pass constantly being pushed away. --- uct/policy/ucb1.c | 5 ----- uct/policy/ucb1amaf.c | 11 +++++++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c index 7a59342..710a56d 100644 --- a/uct/policy/ucb1.c +++ b/uct/policy/ucb1.c @@ -98,11 +98,6 @@ ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, stru /* Initialization of UCT values based on prior knowledge */ struct ucb1_policy *pp = p->data; - if (is_pass(node->coord)) { - /* Discourage passing */ - node->prior.playouts += 10; - node->prior.wins += parity > 0 ? 0 : 10; - } else /* Q_{even} */ /* This may be dubious for normal UCB1 but is essential for * reading stability of RAVE, it appears. */ diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index f9e6974..eb4f0e0 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -270,10 +270,17 @@ ucb1srave_descend(struct uct_policy *p, struct tree *tree, struct tree_node *nod if (b->urg_randomm) urgency *= (float)(fast_random(b->urg_randomm) + 5) / b->urg_randomm; - if (urgency > best_urgency) + if (urgency > best_urgency) { best_urgency = urgency; nbests = 0; - if (urgency >= best_urgency) + } + if (urgency >= best_urgency) { + /* We want to always choose something else than a pass + * in case of a tie. pass causes degenerative behaviour. */ + if (nbests == 1 && is_pass(nbest[0]->coord)) { + nbests--; + } nbest[nbests++] = ni; + } } #if 0 struct board bb; bb.size = 11; -- 2.11.4.GIT