From 48007c59ca8573f13bbce66797c2e4330291ba11 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Mon, 3 Jan 2011 03:10:00 +0100 Subject: [PATCH] UCT: Progressive Unpruning support (for all policies, tunable) --- uct/internal.h | 5 +++++ uct/policy/generic.h | 18 ++++++++++++++++++ uct/tree.c | 26 ++++++++++++++++++++++++-- uct/tree.h | 6 +++++- uct/uct.c | 16 ++++++++++++++++ 5 files changed, 68 insertions(+), 3 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index 0afe868..a70f65f 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -90,6 +90,11 @@ struct uct { bool local_tree_playout; // can be true only if ELO playout bool local_tree_pseqroot; + /* Progressive unpruning */ + int prune_initial; + int prune_playouts; + floating_t prune_speedup; // log() of user input + char *banner; struct uct_policy *policy; diff --git a/uct/policy/generic.h b/uct/policy/generic.h index d901de7..61b0f84 100644 --- a/uct/policy/generic.h +++ b/uct/policy/generic.h @@ -12,6 +12,7 @@ struct tree_node; struct tree_node *uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude); void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent *descent); +static int uct_get_prune_level(struct uct *u, struct tree_node *node); /* Some generic stitching for tree descent. */ @@ -21,6 +22,7 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des /* XXX: We assume board <=25x25. */ \ struct uct_descent dbest[BOARD_MAX_MOVES + 1] = { { .node = descent->node->children, .lnode = NULL } }; int dbests = 1; \ floating_t best_urgency = -9999; \ + int prune_level = uct_get_prune_level(policy->uct, descent->node); \ /* Descent children iterator. */ \ struct uct_descent dci = { .node = descent->node->children, .lnode = descent->lnode ? descent->lnode->children : NULL }; \ \ @@ -29,6 +31,9 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des /* Do not consider passing early. */ \ if (unlikely((!allow_pass && is_pass(dci.node->coord)) || (dci.node->hints & TREE_HINT_INVALID))) \ continue; \ + /* Skip temporarily pruned moves. */ \ + if (dci.node->prune_rank >= prune_level) \ + continue; \ /* Position dci.lnode to point at or right after the local * node corresponding to dci.node. */ \ while (dci.lnode && dci.lnode->coord < dci.node->coord) \ @@ -67,4 +72,17 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des #define uctd_get_best_child(descent) *(descent) = dbest[fast_random(dbests)]; +static inline int +uct_get_prune_level(struct uct *u, struct tree_node *node) +{ + int level = u->prune_initial; + if (node->u.playouts < u->prune_playouts) + return level; + /* Chaslot et al: PROGRESSIVE STRATEGIES FOR MONTE-CARLO TREE SEARCH + * k nodes are unpruned when the number of simulations in the parent + * surpasses A * B^(k-kinit) simulated games. */ + level += (floating_t) log(node->u.playouts / u->prune_playouts) / u->prune_speedup; + return level; +} + #endif diff --git a/uct/tree.c b/uct/tree.c index 797065c..168f497 100644 --- a/uct/tree.c +++ b/uct/tree.c @@ -195,12 +195,12 @@ tree_node_dump(struct tree *tree, struct tree_node *node, int l, int thres) children++; /* We use 1 as parity, since for all nodes we want to know the * win probability of _us_, not the node color. */ - fprintf(stderr, "[%s] %f %% %d [prior %f %% %d amaf %f %% %d]; hints %x; %d children <%"PRIhash">\n", + fprintf(stderr, "[%s] %f %% %d [prior %f %% %d amaf %f %% %d]; hints %x; prune %d; %d children <%"PRIhash">\n", coord2sstr(node->coord, tree->board), tree_node_get_value(tree, 1, node->u.value), node->u.playouts, tree_node_get_value(tree, 1, node->prior.value), node->prior.playouts, tree_node_get_value(tree, 1, node->amaf.value), node->amaf.playouts, - node->hints, children, node->hash); + node->hints, node->prune_rank, children, node->hash); /* Print nodes sorted by #playouts. */ @@ -603,6 +603,7 @@ tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum s uct_prior(u, node, &map); /* Now, create the nodes. */ + floating_t priors[board_size2(b)]; int priors_n = 0; struct tree_node *ni = tree_init_node(t, pass, node->depth + 1, t->nodes); /* In fast_alloc mode we might temporarily run out of nodes but this should be rare. */ if (!ni) { @@ -612,6 +613,7 @@ tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum s struct tree_node *first_child = ni; ni->parent = node; ni->prior = map.prior[pass]; ni->d = TREE_NODE_D_MAX + 1; + priors[priors_n++] = map.parity > 0 ? ni->prior.value : 1 - ni->prior.value; /* The loop considers only the symmetry playground. */ if (UDEBUGL(6)) { @@ -645,10 +647,30 @@ tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum s nj->parent = node; ni->sibling = nj; ni = nj; ni->prior = map.prior[c]; + priors[priors_n++] = map.parity > 0 ? ni->prior.value : 1 - ni->prior.value; ni->d = distances[c]; } } node->children = first_child; // must be done at the end to avoid race + + /* Now, rank the nodes by their prior value. */ + /* O(N^2) bubblesort, lalala */ + int shuf[priors_n]; // permutation info + for (int i = 0; i < priors_n; i++) + shuf[i] = i; + for (int i = 0; i < priors_n; i++) + for (int j = i + 1; j < priors_n; j++) + if (priors[shuf[i]] < priors[shuf[j]]) { + int swap = shuf[i]; + shuf[i] = shuf[j]; + shuf[j] = swap; + } + int ranks[priors_n]; + for (int i = 0; i < priors_n; i++) + ranks[shuf[i]] = i; + int i = 0; + for (struct tree_node *ni = node->children; ni; ni = ni->sibling, i++) + ni->prune_rank = ranks[i]; } diff --git a/uct/tree.h b/uct/tree.h index 501de11..eec68f6 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -66,6 +66,9 @@ struct tree_node { #define TREE_NODE_D_MAX 3 unsigned char d; + /* Position in progressive unpruning sequence */ + unsigned short prune_rank; + #define TREE_HINT_INVALID 1 // don't go to this node, invalid move unsigned char hints; @@ -136,7 +139,8 @@ struct tree { /* Warning: all functions below except tree_expand_node & tree_leaf_node are THREAD-UNSAFE! */ struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size, - unsigned long max_pruned_size, unsigned long pruning_threshold, floating_t ltree_aging, int hbits); + unsigned long max_pruned_size, unsigned long pruning_threshold, + floating_t ltree_aging, int hbits); void tree_done(struct tree *tree); void tree_dump(struct tree *tree, int thres); void tree_save(struct tree *tree, struct board *b, int thres); diff --git a/uct/uct.c b/uct/uct.c index 49820b0..024dfba 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -555,6 +555,10 @@ uct_state_init(char *arg, struct board *b) u->tenuki_d = 4; u->local_tree_aging = 2; + u->prune_initial = 5; + u->prune_playouts = 40; + u->prune_speedup = log(1.3); + u->plugins = pluginset_init(b); u->jdict = joseki_load(b->size); @@ -863,6 +867,18 @@ uct_state_init(char *arg, struct board *b) * coefficient because of it. */ u->val_extra = !optval || atoi(optval); + /* Progressive Unpruning */ + + } else if (!strcasecmp(optname, "prune_initial") && optval) { + /* Initial number of unpruned moves. */ + u->prune_playouts = atoi(optval); + } else if (!strcasecmp(optname, "prune_playouts") && optval) { + /* A term in the progressive unpruning equation. */ + u->prune_playouts = atoi(optval); + } else if (!strcasecmp(optname, "prune_speedup") && optval) { + /* B term in the progressive unpruning equation. */ + u->prune_speedup = log(atof(optval)); + /** Local trees */ /* (Purely experimental. Does not work - yet!) */ -- 2.11.4.GIT