From 6771b36cf0ddd2b0cca0d9016b2a34130ab2d682 Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Thu, 4 Feb 2010 17:05:45 +0100 Subject: [PATCH] New optional fast_alloc mode, allocating all nodes at once. See tree.h for overview. --- uct/internal.h | 1 + uct/tree.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++-------- uct/tree.h | 32 +++++++++- uct/uct.c | 12 +++- uct/walk.c | 6 +- 5 files changed, 212 insertions(+), 33 deletions(-) diff --git a/uct/internal.h b/uct/internal.h index 74d9979..e4ed3ba 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -30,6 +30,7 @@ struct uct { int dumpthres; int force_seed; bool no_book; + bool fast_alloc; unsigned long max_tree_size; int mercymin; diff --git a/uct/tree.c b/uct/tree.c index 94df30c..1a7f19d 100644 --- a/uct/tree.c +++ b/uct/tree.c @@ -18,16 +18,40 @@ #include "uct/tree.h" -/* This function may be called by multiple threads in parallel */ +/* Allocate one node in the fast_alloc mode. The returned node + * is _not_ initialized. Returns NULL if not enough memory. + * This function may be called by multiple threads in parallel. */ +static struct tree_node * +tree_fast_alloc_node(struct tree *t) +{ + assert(t->nodes != NULL); + struct tree_node *n = NULL; + unsigned long old_size =__sync_fetch_and_add(&t->nodes_size, sizeof(*n)); + + if (old_size + sizeof(*n) <= t->max_tree_size) + n = (struct tree_node *)(t->nodes + old_size); + return n; +} + +/* Allocate and initialize a node. Returns NULL (fast_alloc mode) + * or exits the main program if not enough memory. + * This function may be called by multiple threads in parallel. */ static struct tree_node * tree_init_node(struct tree *t, coord_t coord, int depth) { - struct tree_node *n = calloc(1, sizeof(*n)); - if (!n) { - fprintf(stderr, "tree_init_node(): OUT OF MEMORY\n"); - exit(1); + struct tree_node *n; + if (t->nodes) { + n = tree_fast_alloc_node(t); + if (!n) return n; + memset(n, 0, sizeof(*n)); + } else { + n = calloc(1, sizeof(*n)); + if (!n) { + fprintf(stderr, "tree_init_node(): OUT OF MEMORY\n"); + exit(1); + } + __sync_fetch_and_add(&t->nodes_size, sizeof(*n)); } - __sync_fetch_and_add(&t->nodes_size, sizeof(*n)); n->coord = coord; n->depth = depth; volatile static long c = 1000000; @@ -37,11 +61,24 @@ tree_init_node(struct tree *t, coord_t coord, int depth) return n; } +/* Create a tree structure. Pre-allocate all nodes if max_tree_size is > 0. */ struct tree * -tree_init(struct board *board, enum stone color) +tree_init(struct board *board, enum stone color, unsigned long max_tree_size) { struct tree *t = calloc(1, sizeof(*t)); t->board = board; + t->max_tree_size = max_tree_size; + if (max_tree_size != 0) { + /* Allocate one extra node, max_tree_size may not be multiple of node size. */ + t->nodes = malloc(max_tree_size + sizeof(struct tree_node)); + /* The nodes buffer doesn't need initialization. This is currently + * done by tree_init_node to spread the load. Doing a memset for the + * entire buffer here would be too slow for large trees (>10 GB). */ + if (!t->nodes) { + fprintf(stderr, "tree_init(): OUT OF MEMORY\n"); + exit(1); + } + } /* The root PASS move is only virtual, we never play it. */ t->root = tree_init_node(t, pass, 0); t->root_symmetry = board->symmetry; @@ -73,7 +110,7 @@ struct subtree_ctx { struct tree_node *n; }; -/* Worker thread for tree_done_node_detached() */ +/* Worker thread for tree_done_node_detached(). Only for fast_alloc=false. */ static void * tree_done_node_worker(void *ctx_) { @@ -83,7 +120,7 @@ tree_done_node_worker(void *ctx_) unsigned long tree_size = tree_done_node(ctx->t, ctx->n); if (!tree_size) free(ctx->t); - if (DEBUGL(0)) // jlg: 0->3 + if (DEBUGL(2)) fprintf(stderr, "done freeing node at %s, tree size %lu\n", str, tree_size); free(str); free(ctx); @@ -91,7 +128,7 @@ tree_done_node_worker(void *ctx_) } /* Asynchronously free the subtree of nodes rooted at n. If the tree becomes - * empty free the tree also. */ + * empty free the tree also. Only for fast_alloc=false. */ static void tree_done_node_detached(struct tree *t, struct tree_node *n) { @@ -121,11 +158,15 @@ tree_done(struct tree *t) { if (t->chchvals) free(t->chchvals); if (t->chvals) free(t->chvals); - if (!tree_done_node(t, t->root)) - free(t); - /* A tree_done_node_worker might still be running on this tree but - * it will free the tree later. It is also freeing nodes faster than - * we will create new ones. */ + if (t->nodes) { + free(t->nodes); + free(t); + } else if (!tree_done_node(t, t->root)) { + free(t); + /* A tree_done_node_worker might still be running on this tree but + * it will free the tree later. It is also freeing nodes faster than + * we will create new ones. */ + } } @@ -319,12 +360,89 @@ tree_node_copy(struct tree_node *node) struct tree * tree_copy(struct tree *tree) { + assert(!tree->nodes); struct tree *t2 = malloc(sizeof(*t2)); *t2 = *tree; t2->root = tree_node_copy(tree->root); return t2; } +/* Copy the subtree rooted at node, discarding nodes with less + * than min_playouts or more than max_playouts, until dest is + * full or we have copied all the subtree. Only for fast_alloc. + * The code is destructive on src, and the order of nodes is changed. + * Returns the copy of node in the destination tree, or NULL + * if we could not copy it. */ +static struct tree_node * +tree_prune(struct tree *dest, struct tree *src, struct tree_node *node, + int min_playouts, int max_playouts) +{ + assert(dest->nodes && node); + if (node->u.playouts < min_playouts || dest->nodes_size >= dest->max_tree_size) + return NULL; + struct tree_node *n2; + if (node->u.playouts > max_playouts) { + /* Node already copied but we must recurse on the children. + * Here node->parent is the node copy. */ + n2 = node->parent; + assert(n2 && n2->hash == node->hash); + } else { + n2 = tree_fast_alloc_node(dest); + assert(n2); + *n2 = *node; + if (n2->depth > dest->max_depth) + dest->max_depth = n2->depth; + n2->children = NULL; + n2->is_expanded = false; + // Misuse the parent field to remember the copy for future passses: + node->parent = n2; + } + struct tree_node *ni = node->children; + while (ni) { + struct tree_node *ni2 = tree_prune(dest, src, ni, min_playouts, max_playouts); + if (ni2 && ni2->u.playouts <= max_playouts) { + ni2->sibling = n2->children; + n2->children = ni2; + n2->is_expanded = true; + ni2->parent = n2; + } + ni = ni->sibling; + } + return n2; +} + +/* Free all the tree, keeping only the subtree rooted at node. + * Prune the subtree if necessary to fit in max_size bytes. + * Returns the moved node. Only for fast_alloc. */ +static struct tree_node * +tree_garbage_collect(struct tree *tree, unsigned long max_size, struct tree_node *node) +{ + assert(tree->nodes && !node->parent && !node->sibling); + struct tree *temp_tree = tree_init(tree->board, tree->root_color, max_size); + struct tree_node *temp_node; + /* Copy the best (most played) nodes first. */ + int min_playouts = node->u.playouts; + int max_playouts = min_playouts; + do { + temp_node = tree_prune(temp_tree, tree, node, min_playouts, max_playouts); + max_playouts = min_playouts - 1; + min_playouts /= 2; + } while (max_playouts >= 0 && temp_tree->nodes_size < temp_tree->max_tree_size); + + if (DEBUGL(1)) + fprintf(stderr, "tree pruned, max_size %lu, pruned size %lu, min_playouts %d\n", + max_size, temp_tree->nodes_size, max_playouts+1); + + /* Now copy back to original tree. */ + tree->nodes_size = 0; + tree->max_depth = 0; + assert(temp_node); + struct tree_node *new_node = tree_prune(tree, temp_tree, temp_node, 0, temp_node->u.playouts); + tree_done(temp_tree); + return new_node; +} + + static void tree_node_merge(struct tree_node *dest, struct tree_node *src) { @@ -391,6 +509,9 @@ next_di: void tree_merge(struct tree *dest, struct tree *src) { + /* Not suitable for fast_alloc which reorders children. */ + assert(!dest->nodes); + if (src->max_depth > dest->max_depth) dest->max_depth = src->max_depth; tree_node_merge(dest->root, src->root); @@ -430,6 +551,7 @@ tree_normalize(struct tree *tree, int factor) * guidelines here. */ +/* This function must be thread safe, given that board b is only modified by the calling thread. */ void tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum stone color, struct uct *u, int parity) { @@ -468,6 +590,12 @@ tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum s /* Now, create the nodes. */ struct tree_node *ni = tree_init_node(t, pass, node->depth + 1); + /* In fast_alloc mode we might temporarily run out of nodes but + * this should be rare if MIN_FREE_MEM_PERCENT is set correctly. */ + if (!ni) { + node->is_expanded = false; + return; + } struct tree_node *first_child = ni; ni->parent = node; ni->prior = map.prior[pass]; ni->d = TREE_NODE_D_MAX + 1; @@ -497,6 +625,10 @@ tree_expand_node(struct tree *t, struct tree_node *node, struct board *b, enum s assert(c != node->coord); // I have spotted "C3 C3" in some sequence... struct tree_node *nj = tree_init_node(t, c, node->depth + 1); + if (!nj) { + node->is_expanded = false; + return; + } nj->parent = node; ni->sibling = nj; ni = nj; ni->prior = map.prior[c]; @@ -591,18 +723,30 @@ tree_unlink_node(struct tree_node *node) node->parent = NULL; } +/* Promotes the given node as the root of the tree. In the fast_alloc + * mode, the node may be moved and some of its subtree may be pruned. */ void -tree_promote_node(struct tree *tree, struct tree_node *node) +tree_promote_node(struct tree *tree, struct tree_node **node) { - assert(node->parent == tree->root); - tree_unlink_node(node); - /* Freeing the rest of the tree can take several seconds on large - * trees, so we must do it asynchronously: */ - tree_done_node_detached(tree, tree->root); - tree->root = node; + assert((*node)->parent == tree->root); + tree_unlink_node(*node); + if (!tree->nodes) { + /* Freeing the rest of the tree can take several seconds on large + * trees, so we must do it asynchronously: */ + tree_done_node_detached(tree, tree->root); + } else { + unsigned long min_free_size = (MIN_FREE_MEM_PERCENT * tree->max_tree_size) / 100; + if (tree->nodes_size >= tree->max_tree_size - min_free_size) + *node = tree_garbage_collect(tree, min_free_size, *node); + /* If we still have enough free memory, we will free everything later. */ + } + tree->root = *node; tree->root_color = stone_other(tree->root_color); - board_symmetry_update(tree->board, &tree->root_symmetry, node->coord); - tree->max_depth--; + board_symmetry_update(tree->board, &tree->root_symmetry, (*node)->coord); + /* If the tree deepest node was under node, or if we called tree_garbage_collect, + * tree->max_depth is correct. Otherwise we could traverse the tree + * to recompute max_depth but it's not worth it: it's just for debugging + * and soon the tree will grow and max_depth will become correct again. */ if (tree->chchvals) { free(tree->chchvals); tree->chchvals = NULL; } if (tree->chvals) { free(tree->chvals); tree->chvals = NULL; } } @@ -614,7 +758,7 @@ tree_promote_at(struct tree *tree, struct board *b, coord_t c) for (struct tree_node *ni = tree->root->children; ni; ni = ni->sibling) { if (ni->coord == c) { - tree_promote_node(tree, ni); + tree_promote_node(tree, &ni); return true; } } diff --git a/uct/tree.h b/uct/tree.h index 2320235..c14cba1 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -1,6 +1,30 @@ #ifndef ZZGO_UCT_TREE_H #define ZZGO_UCT_TREE_H +/* Management of UCT trees. See diagram below for the node structure. + * + * Two allocation methods are supported for the tree nodes: + * + * - calloc/free: each node is allocated with one calloc. + * After a move, all nodes except the subtree rooted at + * the played move are freed one by one with free(). + * Since this can be very slow (seen 9s and loss on time because + * of this) the nodes are freed in a background thread. + * We still reserve enough memory for the next move in case + * the background thread doesn't free nodes fast enough. + * + * - fast_alloc: a large buffer is allocated once, and each + * node allocation takes some of this buffer. After a move + * is played, no memory if freed if the buffer still has + * enough free space. Otherwise the subtree rooted at the + * played move is copied to a temporary buffer, pruning it + * if necessary to fit in this small buffer. We copy by + * preference nodes with largest number of playouts. + * Then the temporary buffer is copied back to the original + * buffer, which has now plenty of space. + * Once the fast_alloc mode is proven reliable, the + * calloc/free method will be removed. */ + #include #include #include "move.h" @@ -70,9 +94,12 @@ struct tree { // Statistics int max_depth; volatile unsigned long nodes_size; // byte size of all allocated nodes + unsigned long max_tree_size; // maximum byte size for entire tree, > 0 only for fast_alloc + void *nodes; // nodes buffer, only for fast_alloc }; -struct tree *tree_init(struct board *board, enum stone color); +/* Warning: all functions below except tree_expand_node & tree_leaf_node are THREAD-UNSAFE! */ +struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size); void tree_done(struct tree *tree); void tree_dump(struct tree *tree, int thres); void tree_save(struct tree *tree, struct board *b, int thres); @@ -81,9 +108,8 @@ struct tree *tree_copy(struct tree *tree); void tree_merge(struct tree *dest, struct tree *src); void tree_normalize(struct tree *tree, int factor); -/* Warning: All these functions are THREAD-UNSAFE! */ void tree_expand_node(struct tree *tree, struct tree_node *node, struct board *b, enum stone color, struct uct *u, int parity); -void tree_promote_node(struct tree *tree, struct tree_node *node); +void tree_promote_node(struct tree *tree, struct tree_node **node); bool tree_promote_at(struct tree *tree, struct board *b, coord_t c); static bool tree_leaf_node(struct tree_node *node); diff --git a/uct/uct.c b/uct/uct.c index 227c04b..10c6b14 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -58,7 +58,7 @@ static void uct_pondering_stop(struct uct *u); static void setup_state(struct uct *u, struct board *b, enum stone color) { - u->t = tree_init(b, color); + u->t = tree_init(b, color, u->fast_alloc ? u->max_tree_size: 0); if (u->force_seed) fast_srandom(u->force_seed); if (UDEBUGL(0)) @@ -701,7 +701,7 @@ uct_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone } } - tree_promote_node(u->t, best); + tree_promote_node(u->t, &best); /* After a pass, pondering is harmful for two reasons: * (i) We might keep pondering even when the game is over. * Of course this is the case for opponent resign as well. @@ -741,7 +741,8 @@ uct_genbook(struct engine *e, struct board *b, struct time_info *ti, enum stone void uct_dumpbook(struct engine *e, struct board *b, enum stone color) { - struct tree *t = tree_init(b, color); + struct uct *u = e->data; + struct tree *t = tree_init(b, color, u->fast_alloc ? u->max_tree_size: 0); tree_load(t, b); tree_dump(t, 0); tree_done(t); @@ -988,6 +989,11 @@ uct_state_init(char *arg, struct board *b) exit(1); } + if (u->fast_alloc && !u->parallel_tree) { + fprintf(stderr, "fast_alloc not supported with root parallelization.\n"); + exit(1); + } + if (!u->prior) u->prior = uct_prior_init(NULL, b); diff --git a/uct/walk.c b/uct/walk.c index 98c8bd5..0619008 100644 --- a/uct/walk.c +++ b/uct/walk.c @@ -93,9 +93,11 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color, /* If we don't anticipate well the opponent move during pondering * (the played move has few playouts) we still need more memory - * during genmove to explore the tree actually played. */ + * during genmove to explore the tree actually played. + * For fast_alloc, the tree compaction will free enough memory + * immediately. */ unsigned long max_tree_size = u->max_tree_size; - if (u->pondering) + if (u->pondering && !u->fast_alloc) max_tree_size = (max_tree_size * (100 - MIN_FREE_MEM_PERCENT)) / 100; /* We need to make sure only one thread expands the node. If -- 2.11.4.GIT