From 17aa156dff8f18c88bf5908728315646d2aec874 Mon Sep 17 00:00:00 2001
From: Petr Baudis <pasky@ucw.cz>
Date: Wed, 9 Apr 2008 15:06:11 +0200
Subject: [PATCH] UCT: Way to tune different prior heuristics

---
 uct/policy/ucb1.c     | 44 +++++++++++++++++++++++++++-----------------
 uct/policy/ucb1amaf.c | 10 +++++++++-
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c
index b3632fe..68bcfdc 100644
--- a/uct/policy/ucb1.c
+++ b/uct/policy/ucb1.c
@@ -24,7 +24,7 @@ struct ucb1_policy {
 	float fpu;
 	/* Equivalent experience for prior knowledge. MoGo paper recommends
 	 * 50 playouts per source. */
-	int eqex;
+	int eqex, gp_eqex, policy_eqex;
 };
 
 
@@ -100,31 +100,33 @@ ucb1_prior(struct uct_policy *p, struct tree *tree, struct tree_node *node, stru
 #endif
 
 	/* Q_{grandparent} */
-	if (node->parent && node->parent->parent && node->parent->parent->parent) {
+	if (pp->gp_eqex && node->parent && node->parent->parent && node->parent->parent->parent) {
 		struct tree_node *gpp = node->parent->parent->parent;
 		for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
 			/* Be careful not to emphasize too random results. */
-			if (ni->coord == node->coord && ni->u.playouts > pp->eqex) {
-				node->prior.playouts += pp->eqex;
-				node->prior.wins += pp->eqex * ni->u.wins / ni->u.playouts;
+			if (ni->coord == node->coord && ni->u.playouts > pp->gp_eqex) {
+				node->prior.playouts += pp->gp_eqex;
+				node->prior.wins += pp->gp_eqex * ni->u.wins / ni->u.playouts;
 				node->hints |= 1;
 			}
 		}
 	}
 
 	/* Q_{playout-policy} */
-	float assess = NAN;
-	struct playout_policy *playout = p->uct->playout;
-	if (playout->assess) {
-		struct move m = { node->coord, color };
-		assess = playout->assess(playout, b, &m);
-	}
-	if (!isnan(assess)) {
-		if (parity < 0)
-			assess = 1 - assess;
-		node->prior.playouts += pp->eqex;
-		node->prior.wins += pp->eqex * assess;
-		node->hints |= 2;
+	if (pp->policy_eqex) {
+		float assess = NAN;
+		struct playout_policy *playout = p->uct->playout;
+		if (playout->assess) {
+			struct move m = { node->coord, color };
+			assess = playout->assess(playout, b, &m);
+		}
+		if (!isnan(assess)) {
+			if (parity < 0)
+				assess = 1 - assess;
+			node->prior.playouts += pp->policy_eqex;
+			node->prior.wins += pp->policy_eqex * assess;
+			node->hints |= 2;
+		}
 	}
 
 	if (node->prior.playouts) {
@@ -163,6 +165,7 @@ policy_ucb1_init(struct uct *u, char *arg)
 
 	b->explore_p = 0.2;
 	b->fpu = INFINITY;
+	b->gp_eqex = b->policy_eqex = -1;
 
 	if (arg) {
 		char *optspec, *next = arg;
@@ -181,6 +184,10 @@ policy_ucb1_init(struct uct *u, char *arg)
 				b->eqex = optval ? atoi(optval) : 50;
 				if (b->eqex)
 					p->prior = ucb1_prior;
+			} else if (!strcasecmp(optname, "prior_gp") && optval) {
+				b->gp_eqex = atoi(optval);
+			} else if (!strcasecmp(optname, "prior_policy") && optval) {
+				b->policy_eqex = atoi(optval);
 			} else if (!strcasecmp(optname, "fpu") && optval) {
 				b->fpu = atof(optval);
 			} else {
@@ -189,5 +196,8 @@ policy_ucb1_init(struct uct *u, char *arg)
 		}
 	}
 
+	if (b->gp_eqex < 0) b->gp_eqex = b->eqex;
+	if (b->policy_eqex < 0) b->policy_eqex = b->eqex;
+
 	return p;
 }
diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c
index 40eda28..f9abc7a 100644
--- a/uct/policy/ucb1amaf.c
+++ b/uct/policy/ucb1amaf.c
@@ -24,7 +24,7 @@ struct ucb1_policy {
 	float fpu;
 	/* Equivalent experience for prior knowledge. MoGo paper recommends
 	 * 50 playouts per source. */
-	int eqex;
+	int eqex, gp_eqex, policy_eqex;
 	float explore_p_rave;
 	int equiv_rave;
 	bool rave_prior;
@@ -128,6 +128,7 @@ policy_ucb1amaf_init(struct uct *u, char *arg)
 	b->explore_p_rave = 0.2;
 	b->equiv_rave = 3000;
 	b->fpu = INFINITY;
+	b->gp_eqex = b->policy_eqex = -1;
 
 	if (arg) {
 		char *optspec, *next = arg;
@@ -146,6 +147,10 @@ policy_ucb1amaf_init(struct uct *u, char *arg)
 				b->eqex = optval ? atoi(optval) : 50;
 				if (b->eqex)
 					p->prior = ucb1_prior;
+			} else if (!strcasecmp(optname, "prior_gp") && optval) {
+				b->gp_eqex = atoi(optval);
+			} else if (!strcasecmp(optname, "prior_policy") && optval) {
+				b->policy_eqex = atoi(optval);
 			} else if (!strcasecmp(optname, "fpu") && optval) {
 				b->fpu = atof(optval);
 			} else if (!strcasecmp(optname, "rave")) {
@@ -162,5 +167,8 @@ policy_ucb1amaf_init(struct uct *u, char *arg)
 		}
 	}
 
+	if (b->gp_eqex < 0) b->gp_eqex = b->eqex;
+	if (b->policy_eqex < 0) b->policy_eqex = b->eqex;
+
 	return p;
 }
-- 
2.11.4.GIT