From 4c4a6a8b3f77295d7b4081ffa0514d10a564cae4 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Wed, 17 Feb 2010 22:08:36 +0100 Subject: [PATCH] UCT root_heuristic -> local_tree Instead of storing and biasing by tree root stats, try to bias according to whole sequences, broken at tenuki move - effectively, we store statistics of local situation solutions. --- uct/internal.h | 17 ++++---- uct/policy/generic.c | 2 +- uct/policy/generic.h | 34 +++++++++++---- uct/policy/ucb1.c | 4 +- uct/policy/ucb1amaf.c | 31 ++++++++------ uct/tree.c | 73 ++++++++++++++++++++++---------- uct/tree.h | 15 +++++-- uct/uct.c | 15 +++++-- uct/walk.c | 112 +++++++++++++++++++++++++++++++++++++++----------- 9 files changed, 223 insertions(+), 80 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index 3ff02d4..59d8630 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -7,6 +7,7 @@ #include "move.h" #include "ownermap.h" #include "playout.h" +#include "stats.h" struct tree; struct tree_node; @@ -59,7 +60,8 @@ struct uct { bool val_extra; int random_policy_chance; - int root_heuristic; + int local_tree; + int tenuki_d; char *banner; @@ -85,14 +87,15 @@ bool uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pas /* This is the state used for descending the tree; we use this wrapper * structure in order to be able to easily descend in multiple trees - * in parallel or compute cummulative "path value" throughout the tree - * descent. - * - * XXX: We don't actually do that now, but this infrastructure is vital - * for couple of my research patches that are kept external yet, so I'd - * very much like to keep it here to reduce my diff sizes. --pasky */ + * in parallel (e.g. main tree and local tree) or compute cummulative + * "path value" throughout the tree descent. */ struct uct_descent { + /* Active tree nodes: */ struct tree_node *node; /* Main tree. */ + struct tree_node *lnode; /* Local tree. */ + /* Value of main tree node (with all value factors, but unbiased + * - without exploration factor), from black's perspective. */ + struct move_stats value; }; diff --git a/uct/policy/generic.c b/uct/policy/generic.c index 2610e42..a63e0c1 100644 --- a/uct/policy/generic.c +++ b/uct/policy/generic.c @@ -46,7 +46,7 @@ uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent bool allow_pass = false; /* At worst forces some extra playouts at the end */ int parity = ((descent->node->depth ^ tree->root->depth) & 1) ? -1 : 1; - uctd_try_node_children(tree, descent, allow_pass, di, urgency) { + uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) { urgency = p->evaluate(p, tree, &di, parity); } uctd_set_best_child(di, urgency); diff --git a/uct/policy/generic.h b/uct/policy/generic.h index 6fd43a1..a3312fc 100644 --- a/uct/policy/generic.h +++ b/uct/policy/generic.h @@ -15,17 +15,33 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des /* Some generic stitching for tree descent. */ -#define uctd_try_node_children(tree, descent, allow_pass, di, urgency) \ +#define uctd_try_node_children(tree, descent, allow_pass, parity, tenuki_d, di, urgency) \ + /* Information abound best children. */ \ /* XXX: Stack overflow danger on big boards? */ \ - struct uct_descent dbest[512] = { { .node = descent->node->children } }; int dbests = 1; \ + struct uct_descent dbest[512] = { { .node = descent->node->children, .lnode = NULL } }; int dbests = 1; \ float best_urgency = -9999; \ - struct uct_descent di = { .node = descent->node->children }; \ + /* Descent children iterator. */ \ + struct uct_descent dci = { .node = descent->node->children, .lnode = descent->lnode->children }; \ \ - for (; di.node; di.node = di.node->sibling) { \ + for (; dci.node; dci.node = dci.node->sibling) { \ float urgency; \ /* Do not consider passing early. */ \ - if (unlikely((!allow_pass && is_pass(di.node->coord)) || (di.node->hints & TREE_HINT_INVALID))) \ - continue; + if (unlikely((!allow_pass && is_pass(dci.node->coord)) || (dci.node->hints & TREE_HINT_INVALID))) \ + continue; \ + /* Position dci.lnode to point at or right after the local + * node corresponding to dci.node. */ \ + while (dci.lnode && dci.lnode->coord < dci.node->coord) \ + dci.lnode = dci.lnode->sibling; \ + /* Set up descent-further iterator. This is the public-accessible + * one, and usually is similar to dci. However, in case of local + * trees, we may keep next-candidate pointer in dci while storing + * actual-specimen in di. */ \ + struct uct_descent di = dci; \ + if (dci.lnode) { \ + /* Set lnode to local tree node corresponding + * to node (dci.lnode, pass-lnode or NULL). */ \ + di.lnode = tree_lnode_for_node(tree, dci.node, dci.lnode, tenuki_d); \ + } /* ...your urgency computation code goes here... */ @@ -39,7 +55,11 @@ void uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_des if (dbests == 1 && is_pass(dbest[0].node->coord)) { \ dbests--; \ } \ - dbest[dbests++] = di; \ + struct uct_descent db = di; \ + /* Make sure lnode information is meaningful. */ \ + if (db.lnode && is_pass(db.lnode->coord)) \ + db.lnode = NULL; \ + dbest[dbests++] = db; \ } \ } diff --git a/uct/policy/ucb1.c b/uct/policy/ucb1.c index 74f2468..7bb7245 100644 --- a/uct/policy/ucb1.c +++ b/uct/policy/ucb1.c @@ -38,10 +38,12 @@ ucb1_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *descen struct ucb1_policy *b = p->data; float xpl = log(descent->node->u.playouts + descent->node->prior.playouts); - uctd_try_node_children(tree, descent, allow_pass, di, urgency) { + uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) { struct tree_node *ni = di.node; int uct_playouts = ni->u.playouts + ni->prior.playouts; + /* XXX: We don't take local-tree information into account. */ + if (uct_playouts) { urgency = (ni->u.playouts * tree_node_get_value(tree, parity, ni->u.value) + ni->prior.playouts * tree_node_get_value(tree, parity, ni->prior.value)) diff --git a/uct/policy/ucb1amaf.c b/uct/policy/ucb1amaf.c index 981e012..0cf2ea3 100644 --- a/uct/policy/ucb1amaf.c +++ b/uct/policy/ucb1amaf.c @@ -28,8 +28,8 @@ struct ucb1_policy_amaf { bool both_colors; bool check_nakade; bool sylvain_rave; - /* Coefficient of root values embedded in RAVE. */ - float root_rave; + /* Coefficient of local tree values embedded in RAVE. */ + float ltree_rave; }; @@ -75,11 +75,13 @@ static inline float fast_sqrt(int x) } } +#define LTREE_DEBUG if (0) static float inline ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity) { struct ucb1_policy_amaf *b = p->data; struct tree_node *node = descent->node; + struct tree_node *lnode = descent->lnode; struct move_stats n = node->u, r = node->amaf; if (p->uct->amaf_prior) { @@ -88,13 +90,14 @@ ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *d stats_merge(&n, &node->prior); } - /* Root heuristics, if we aren't actually near the root. */ - if (tree->chvals && b->root_rave > 0 && likely(!is_pass(node->coord)) - && node->parent && node->parent->parent && node->parent->parent->parent) { - struct move_stats *rv = parity > 0 ? tree->chvals : tree->chchvals; - struct move_stats root = rv[node->coord]; - root.playouts *= b->root_rave; - stats_merge(&r, &root); + /* Local tree heuristics. */ + if (p->uct->local_tree && b->ltree_rave > 0 && lnode) { + struct move_stats l = lnode->u; + l.playouts *= b->ltree_rave; + LTREE_DEBUG fprintf(stderr, "[ltree] adding [%s] %f%%%d to [%s] RAVE %f%%%d\n", + coord2sstr(lnode->coord, tree->board), l.value, l.playouts, + coord2sstr(node->coord, tree->board), r.value, r.playouts); + stats_merge(&r, &l); } float value = 0; @@ -118,6 +121,8 @@ ucb1rave_evaluate(struct uct_policy *p, struct tree *tree, struct uct_descent *d } else if (r.playouts) { value = r.value; } + descent->value.playouts = r.playouts + n.playouts; + descent->value.value = value; return tree_node_get_value(tree, parity, value); } @@ -129,7 +134,7 @@ ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *de if (b->explore_p > 0) nconf = sqrt(log(descent->node->u.playouts + descent->node->prior.playouts)); - uctd_try_node_children(tree, descent, allow_pass, di, urgency) { + uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) { struct tree_node *ni = di.node; urgency = ucb1rave_evaluate(p, tree, &di, parity); @@ -241,7 +246,7 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->fpu = INFINITY; b->check_nakade = true; b->sylvain_rave = true; - b->root_rave = 1.0f; + b->ltree_rave = 1.0f; if (arg) { char *optspec, *next = arg; @@ -266,8 +271,8 @@ policy_ucb1amaf_init(struct uct *u, char *arg) b->sylvain_rave = !optval || *optval == '1'; } else if (!strcasecmp(optname, "check_nakade")) { b->check_nakade = !optval || *optval == '1'; - } else if (!strcasecmp(optname, "root_rave") && optval) { - b->root_rave = atof(optval); + } else if (!strcasecmp(optname, "ltree_rave") && optval) { + b->ltree_rave = atof(optval); } else { fprintf(stderr, "ucb1amaf: Invalid policy argument %s or missing value\n", optname); diff --git a/uct/tree.c b/uct/tree.c index 99e409c..7486ad7 100644 --- a/uct/tree.c +++ b/uct/tree.c @@ -87,6 +87,9 @@ tree_init(struct board *board, enum stone color, unsigned long max_tree_size) t->root = tree_init_node(t, pass, 0); t->root_symmetry = board->symmetry; t->root_color = stone_other(color); // to research black moves, root will be white + + t->ltree_black = tree_init_node(t, pass, 0); + t->ltree_white = tree_init_node(t, pass, 0); return t; } @@ -160,8 +163,8 @@ tree_done_node_detached(struct tree *t, struct tree_node *n) void tree_done(struct tree *t) { - if (t->chchvals) free(t->chchvals); - if (t->chvals) free(t->chvals); + tree_done_node(t, t->ltree_black); + tree_done_node(t, t->ltree_white); if (t->nodes) { free(t->nodes); free(t); @@ -210,18 +213,6 @@ tree_node_dump(struct tree *tree, struct tree_node *node, int l, int thres) } void -tree_dump_chval(struct tree *tree, struct move_stats *v) -{ - for (int y = board_size(tree->board) - 2; y > 1; y--) { - for (int x = 1; x < board_size(tree->board) - 1; x++) { - coord_t c = coord_xy(tree->board, x, y); - fprintf(stderr, "%.2f%%%05d ", v[c].value, v[c].playouts); - } - fprintf(stderr, "\n"); - } -} - -void tree_dump(struct tree *tree, int thres) { if (thres && tree->root->u.playouts / thres > 100) { @@ -233,11 +224,11 @@ tree_dump(struct tree *tree, int thres) stone2str(tree->root_color), tree->extra_komi); tree_node_dump(tree, tree->root, 0, thres); - if (DEBUGL(3) && tree->chvals) { - fprintf(stderr, "children stats:\n"); - tree_dump_chval(tree, tree->chvals); - fprintf(stderr, "grandchildren stats:\n"); - tree_dump_chval(tree, tree->chchvals); + if (DEBUGL(3) && tree->ltree_black) { + fprintf(stderr, "B local tree:\n"); + tree_node_dump(tree, tree->ltree_black, 0, thres); + fprintf(stderr, "W local tree:\n"); + tree_node_dump(tree, tree->ltree_white, 0, thres); } } @@ -630,6 +621,47 @@ tree_get_node(struct tree *t, struct tree_node *parent, coord_t c, bool create) return nn; } +/* Get local tree node corresponding to given node, given local node child + * iterator @lni (which points either at the corresponding node, or at the + * nearest local tree node after @ni). */ +struct tree_node * +tree_lnode_for_node(struct tree *tree, struct tree_node *ni, struct tree_node *lni, int tenuki_d) +{ + /* Now set up lnode, which is the actual local node + * corresponding to ni - either lni if it is an + * exact match and ni is not tenuki, local + * node if ni is tenuki, or NULL if there is no + * corresponding node available. */ + + if (is_pass(ni->coord)) { + /* Also, for sanity reasons we never use local + * tree for passes. (Maybe we could, but it's + * too hard to think about.) */ + return NULL; + } + + if (lni->coord == ni->coord) { + /* We don't consider tenuki a sequence play + * that we have in local tree even though + * ni->d is too high; this can happen if this + * occured in different board topology. */ + return lni; + } + + if (ni->d >= tenuki_d) { + /* Tenuki, pick a pass lsibling if available. */ + assert(lni->parent && lni->parent->children); + if (is_pass(lni->parent->children->coord)) { + return lni->parent->children; + } else { + return NULL; + } + } + + /* No corresponding local node, lnode stays NULL. */ + return NULL; +} + /* Tree symmetry: When possible, we will localize the tree to a single part * of the board in tree_expand_node() and possibly flip along symmetry axes @@ -829,12 +861,11 @@ tree_promote_node(struct tree *tree, struct tree_node **node) tree->root = *node; tree->root_color = stone_other(tree->root_color); board_symmetry_update(tree->board, &tree->root_symmetry, (*node)->coord); + /* If the tree deepest node was under node, or if we called tree_garbage_collect, * tree->max_depth is correct. Otherwise we could traverse the tree * to recompute max_depth but it's not worth it: it's just for debugging * and soon the tree will grow and max_depth will become correct again. */ - if (tree->chchvals) { free(tree->chchvals); tree->chchvals = NULL; } - if (tree->chvals) { free(tree->chvals); tree->chvals = NULL; } } bool diff --git a/uct/tree.h b/uct/tree.h index 40c7a9f..65dcd22 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -89,9 +89,17 @@ struct tree { bool use_extra_komi; float extra_komi; - // Summary statistics of good black, white moves in the tree - struct move_stats *chvals; // [bsize2] root children - struct move_stats *chchvals; // [bsize2] root children's children + /* We merge local (non-tenuki) sequences for both colors, occuring + * anywhere in the tree; nodes are created on-demand, special 'pass' + * nodes represent tenuki. Only u move_stats are used, prior and amaf + * is ignored. Values in root node are ignored. */ + /* The values in the tree can be either "raw" or "tempered" + * (representing difference against parent node in the main tree), + * controlled by local_tree setting. */ + struct tree_node *ltree_black; + // Of course even in white tree, winrates are from b's perspective + // as anywhere else. ltree_white has white-first sequences as children. + struct tree_node *ltree_white; // Statistics int max_depth; @@ -111,6 +119,7 @@ void tree_merge(struct tree *dest, struct tree *src); void tree_normalize(struct tree *tree, int factor); void tree_expand_node(struct tree *tree, struct tree_node *node, struct board *b, enum stone color, struct uct *u, int parity); +struct tree_node *tree_lnode_for_node(struct tree *tree, struct tree_node *ni, struct tree_node *lni, int tenuki_d); /* Warning: All these functions are THREAD-UNSAFE! */ struct tree_node *tree_get_node(struct tree *tree, struct tree_node *node, coord_t c, bool create); diff --git a/uct/uct.c b/uct/uct.c index 4b830cc..c9376ee 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -839,6 +839,8 @@ uct_state_init(char *arg, struct board *b) u->val_scale = 0.04; u->val_points = 40; + u->tenuki_d = 4; + if (arg) { char *optspec, *next = arg; while (*next) { @@ -1016,8 +1018,8 @@ uct_state_init(char *arg, struct board *b) * added to the value, instead of scaling the result * coefficient because of it. */ u->val_extra = !optval || atoi(optval); - } else if (!strcasecmp(optname, "root_heuristic") && optval) { - /* Whether to bias exploration by root node values + } else if (!strcasecmp(optname, "local_tree") && optval) { + /* Whether to bias exploration by local tree values * (must be supported by the used policy). * 0: Don't. * 1: Do, value = result. @@ -1025,7 +1027,14 @@ uct_state_init(char *arg, struct board *b) * 2: Do, value = 0.5+(result-expected)/2. * 3: Do, value = 0.5+bzz((result-expected)^2). * 4: Do, value = 0.5+sqrt(result-expected)/2. */ - u->root_heuristic = atoi(optval); + u->local_tree = atoi(optval); + } else if (!strcasecmp(optname, "tenuki_d") && optval) { + /* Tenuki distance at which to break the local tree. */ + u->tenuki_d = atoi(optval); + if (u->tenuki_d > TREE_NODE_D_MAX + 1) { + fprintf(stderr, "uct: tenuki_d must not be larger than TREE_NODE_D_MAX+1 %d\n", TREE_NODE_D_MAX + 1); + exit(1); + } } else if (!strcasecmp(optname, "pass_all_alive")) { /* Whether to consider all stones alive at the game * end instead of marking dead groupd. */ diff --git a/uct/walk.c b/uct/walk.c index 0004de4..981e7c8 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -153,6 +153,45 @@ scale_value(struct uct *u, struct board *b, int result) return rval; } +static void +record_local_sequence(struct uct *u, struct tree *t, + struct uct_descent *descent, int dlen, int di, + enum stone seq_color, float rval) +{ + /* Ignore pass sequences. */ + if (is_pass(descent[di].node->coord)) + return; + +#define LTREE_DEBUG if (UDEBUGL(6)) + LTREE_DEBUG fprintf(stderr, "recording result %f in local %s sequence: ", + rval, stone2str(seq_color)); + int di0 = di; + + /* Pick the right local tree root... */ + struct tree_node *lnode = seq_color == S_BLACK ? t->ltree_black : t->ltree_white; + lnode->u.playouts++; + + /* ...and record the sequence. */ + while (di < dlen && (di == di0 || descent[di].node->d < u->tenuki_d)) { + LTREE_DEBUG fprintf(stderr, "%s[%d] ", + coord2sstr(descent[di].node->coord, t->board), + descent[di].node->d); + lnode = tree_get_node(t, lnode, descent[di++].node->coord, true); + assert(lnode); + stats_add_result(&lnode->u, rval, 1); + } + + /* Add lnode for tenuki (pass) if we descended further. */ + if (di < dlen) { + LTREE_DEBUG fprintf(stderr, "pass "); + lnode = tree_get_node(t, lnode, pass, true); + assert(lnode); + stats_add_result(&lnode->u, rval, 1); + } + + LTREE_DEBUG fprintf(stderr, "\n"); +} + int uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree *t) @@ -173,7 +212,15 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree enum stone node_color = stone_other(player_color); assert(node_color == t->root_color); - struct uct_descent descent = {.node = n }; + /* Tree descent history. */ + /* XXX: This is somewhat messy since @n and descent[dlen-1].node are + * redundant. */ + #define DLEN 512 + struct uct_descent descent[DLEN]; + descent[0].node = n; descent[0].lnode = NULL; + int dlen = 1; + /* Total value of the sequence. */ + struct move_stats seq_value = { .playouts = 0 }; int result; int pass_limit = (board_size(&b2) - 2) * (board_size(&b2) - 2) / 2; @@ -197,15 +244,26 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree node_color = stone_other(node_color); int parity = (node_color == player_color ? 1 : -1); + assert(dlen < DLEN); + descent[dlen] = descent[dlen - 1]; + if (!descent[dlen].lnode || descent[dlen].node->d >= u->tenuki_d) { + /* Start new local sequence. */ + /* Remember that node_color already holds color of the + * to-be-found child. */ + descent[dlen].lnode = node_color == S_BLACK ? t->ltree_black : t->ltree_white; + } + if (!u->random_policy_chance || fast_random(u->random_policy_chance)) - u->policy->descend(u->policy, t, &descent, parity, b2.moves > pass_limit); + u->policy->descend(u->policy, t, &descent[dlen], parity, b2.moves > pass_limit); else - u->random_policy->descend(u->random_policy, t, &descent, parity, b2.moves > pass_limit); + u->random_policy->descend(u->random_policy, t, &descent[dlen], parity, b2.moves > pass_limit); /*** Perform the descent: */ - n = descent.node; + seq_value.playouts += descent[dlen].value.playouts; + seq_value.value += descent[dlen].value.value * descent[dlen].value.playouts; + n = descent[dlen++].node; assert(n == t->root || n->parent); if (UDEBUGL(7)) fprintf(stderr, "%s+-- UCT sent us to [%s:%d] %f\n", @@ -307,27 +365,33 @@ uct_playout(struct uct *u, struct board *b, enum stone player_color, struct tree u->policy->update(u->policy, t, n, node_color, player_color, amaf, rval); - if (u->root_heuristic && n->parent) { - if (!t->chvals) { - t->chvals = calloc(board_size2(b), sizeof(t->chvals[0])); - t->chchvals = calloc(board_size2(b), sizeof(t->chchvals[0])); - } - + if (u->local_tree && n->parent && !is_pass(n->coord) && dlen > 0) { /* Possibly transform the rval appropriately. */ - rval = stats_temper_value(rval, n->parent->u.value, u->root_heuristic); - - struct tree_node *ni = n; - while (ni->parent->parent && ni->parent->parent->parent) - ni = ni->parent; - if (ni->parent->parent) { - if (likely(!is_pass(ni->coord))) - stats_add_result(&t->chchvals[ni->coord], rval, 1); - ni = ni->parent; - } - assert(ni->parent && !ni->parent->parent); - - if (likely(!is_pass(ni->coord))) - stats_add_result(&t->chvals[ni->coord], rval, 1); + float expval = seq_value.value / seq_value.playouts; + rval = stats_temper_value(rval, expval, u->local_tree); + + /* Get the local sequences and record them in ltree. */ + /* We will look for sequence starts in our descent + * history, then run record_local_sequence() for each + * found sequence start; record_local_sequence() may + * pick longer sequences from descent history then, + * which is expected as it will create new lnodes. */ + enum stone seq_color = player_color; + /* First move always starts a sequence. */ + record_local_sequence(u, t, descent, dlen, 1, seq_color, rval); + seq_color = stone_other(seq_color); + for (int dseqi = 2; dseqi < dlen; dseqi++, seq_color = stone_other(seq_color)) { + if (descent[dseqi].node->d >= u->tenuki_d) { + /* Tenuki! Record the fresh sequence. */ + record_local_sequence(u, t, descent, dlen, dseqi, seq_color, rval); + continue; + } + if (descent[dseqi].lnode && !descent[dseqi].lnode) { + /* Record result for in-descent picked sequence. */ + record_local_sequence(u, t, descent, dlen, dseqi, seq_color, rval); + continue; + } + } } } -- 2.11.4.GIT