From cdc05ff983c1713795a116fd8f7b6647e95ddf6b Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sun, 4 Oct 2009 02:38:29 +0200 Subject: [PATCH] UCB1TUNED: Remove bitrotten policy that was never working well --- uct/policy/Makefile | 2 +- uct/policy/ucb1tuned.c | 90 -------------------------------------------------- uct/uct.c | 3 -- 3 files changed, 1 insertion(+), 94 deletions(-) delete mode 100644 uct/policy/ucb1tuned.c diff --git a/uct/policy/Makefile b/uct/policy/Makefile index 0acf99b..6d6639e 100644 --- a/uct/policy/Makefile +++ b/uct/policy/Makefile @@ -1,5 +1,5 @@ INCLUDES=-I../.. -OBJS=ucb1.o ucb1tuned.o ucb1amaf.o +OBJS=ucb1.o ucb1amaf.o all: uctpolicy.a uctpolicy.a: $(OBJS) diff --git a/uct/policy/ucb1tuned.c b/uct/policy/ucb1tuned.c deleted file mode 100644 index 10c55d6..0000000 --- a/uct/policy/ucb1tuned.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include -#include -#include - -#include "board.h" -#include "debug.h" -#include "move.h" -#include "uct/internal.h" -#include "uct/tree.h" - -/* This implements the UCB1-TUNED policy. */ - -struct ucb1_policy_tuned { - /* This is what the Modification of UCT with Patterns in Monte Carlo Go - * paper calls 'p'. Original UCB has this on 2, but this seems to - * produce way too wide searches; reduce this to get deeper and - * narrower readouts - try 0.2. */ - float explore_p; - float fpu; -}; - - -struct tree_node *ucb1_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color); - -struct tree_node * -ucb1tuned_descend(struct uct_policy *p, struct tree *tree, struct tree_node *node, int parity, bool allow_pass) -{ - struct ucb1_policy_tuned *b = p->data; - float xpl = log(node->u.playouts) * b->explore_p; - - struct tree_node *nbest = node->children; - float best_urgency = -9999; - for (struct tree_node *ni = node->children; ni; ni = ni->sibling) { - /* Do not consider passing early. */ - if (likely(!allow_pass) && unlikely(is_pass(ni->coord))) - continue; - float xpl_loc = (ni->u.value - ni->u.value * ni->u.value); - if (tree_parity(tree, parity) < 0) xpl_loc = 1 - xpl_loc; - xpl_loc += sqrt(xpl / ni->u.playouts); - if (xpl_loc > 1.0/4) xpl_loc = 1.0/4; - float urgency = tree_node_get_value(tree, ni, u, parity) + sqrt(xpl * xpl_loc / ni->u.playouts); - if (urgency > best_urgency) { - best_urgency = urgency; - nbest = ni; - } - } - return nbest; -} - -void ucb1_update(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *map, int result); - - -struct uct_policy * -policy_ucb1tuned_init(struct uct *u, char *arg) -{ - struct uct_policy *p = calloc(1, sizeof(*p)); - struct ucb1_policy_tuned *b = calloc(1, sizeof(*b)); - p->uct = u; - p->data = b; - p->descend = ucb1tuned_descend; - p->choose = ucb1_choose; - p->update = ucb1_update; - - b->explore_p = 0.2; - b->fpu = INFINITY; - - if (arg) { - char *optspec, *next = arg; - while (*next) { - optspec = next; - next += strcspn(next, ":"); - if (*next) { *next++ = 0; } else { *next = 0; } - - char *optname = optspec; - char *optval = strchr(optspec, '='); - if (optval) *optval++ = 0; - - if (!strcasecmp(optname, "explore_p")) { - b->explore_p = atof(optval); - } else { - fprintf(stderr, "ucb1tuned: Invalid policy argument %s or missing value\n", optname); - exit(1); - } - } - } - - return p; -} diff --git a/uct/uct.c b/uct/uct.c index 87d93ff..1d26be9 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -20,7 +20,6 @@ #include "uct/uct.h" struct uct_policy *policy_ucb1_init(struct uct *u, char *arg); -struct uct_policy *policy_ucb1tuned_init(struct uct *u, char *arg); struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg); @@ -570,8 +569,6 @@ uct_state_init(char *arg) *policyarg++ = 0; if (!strcasecmp(optval, "ucb1")) { u->policy = policy_ucb1_init(u, policyarg); - } else if (!strcasecmp(optval, "ucb1tuned")) { - u->policy = policy_ucb1tuned_init(u, policyarg); } else if (!strcasecmp(optval, "ucb1amaf")) { u->policy = policy_ucb1amaf_init(u, policyarg); } else { -- 2.11.4.GIT