From 7f8fa5b443da1c9a2230ba7da7941bb627e8d1d4 Mon Sep 17 00:00:00 2001 From: Jean-loup Gailly Date: Sat, 8 May 2010 16:52:08 +0200 Subject: [PATCH] Distributed slave: update tree stats from increments sent by master --- distributed/distributed.h | 50 ++++++++++++++++ uct/internal.h | 10 +--- uct/slave.c | 150 ++++++++++++++++++++++++++++++++++++++++++++-- uct/slave.h | 3 + uct/tree.c | 10 +++- uct/tree.h | 9 ++- uct/uct.c | 22 +++++-- 7 files changed, 232 insertions(+), 22 deletions(-) diff --git a/distributed/distributed.h b/distributed/distributed.h index d436306..d672798 100644 --- a/distributed/distributed.h +++ b/distributed/distributed.h @@ -24,6 +24,39 @@ typedef int64_t path_t; #define append_child(path, c, board) (((path) << board_bits2(board)) | (c)) +/* For debugging only */ +struct hash_counts { + long lookups; + long collisions; + long inserts; + long occupied; +}; + +/* Find a hash table entry given its coord path from root. + * Set found to false if the entry is empty. + * Abort if the table gets too full (should never happen). + * We use double hashing and coord_path = 0 for unused entries. */ +#define find_hash(hash, table, hash_bits, path, found, counts) \ + do { \ + if (DEBUG_MODE) counts.lookups++; \ + int mask = hash_mask(hash_bits); \ + int delta = (int)((path) >> (hash_bits)) | 1; \ + hash = ((int)(path) ^ delta ^ (delta >> (hash_bits))) & mask; \ + path_t cp = (table)[hash].coord_path; \ + found = (cp == path); \ + if (found | !cp) break; \ + int tries = 1 << ((hash_bits)-2); \ + do { \ + if (DEBUG_MODE) counts.collisions++; \ + hash = (hash + delta) & mask; \ + cp = (table)[hash].coord_path; \ + found = (cp == path); \ + if (found | !cp) break; \ + } while (--tries); \ + assert(tries); \ + } while (0) + + /* Stats exchanged between master and slave. They are always * incremental values to be added to what was last sent. */ struct incr_stats { @@ -31,6 +64,23 @@ struct incr_stats { struct move_stats incr; }; +/* A slave machine updates at most 7 (19x19) or 9 (9x9) nodes for each + * update of the root node. If we have at most 20 threads at 1500 + * games/s each, a slave machine can do at most 30K games/s. */ + +/* At 30K games/s a slave can output 270K nodes/s or 4.2 MB/s. The master + * with a 100 MB/s network can thus support at most 24 slaves. */ +#define DEFAULT_MAX_SLAVES 24 + +/* In a 30s move at 270K nodes/s a slave can send and receive at most + * 8.1M nodes so at worst 23 bits are needed for the hash table in the + * slave and for the per-slave hash table in the master. However the + * same nodes are often sent so in practice 21 bits are sufficient. + * Larger hash tables are not desirable because it would take too much + * time to clear them at each move in the master. */ +#define DEFAULT_STATS_HBITS 21 + + #define DIST_GAMELEN 1000 #define force_reply(id) ((id) + DIST_GAMELEN) diff --git a/uct/internal.h b/uct/internal.h index 4a037bd..922d353 100644 --- a/uct/internal.h +++ b/uct/internal.h @@ -4,7 +4,6 @@ /* Internal UCT structures */ #include "debug.h" -#include "distributed/distributed.h" #include "move.h" #include "ownermap.h" #include "playout.h" @@ -22,13 +21,6 @@ struct uct_dynkomi; /* How many games to consider at minimum before judging groups. */ #define GJ_MINGAMES 500 -/* Distributed stats for each child of the root node. */ -struct node_stats { - struct move_stats2 last_sent_own; - struct move_stats2 added_from_others; - struct tree_node *node; -}; - /* Internal engine state. */ struct uct { int debug_level; @@ -92,7 +84,7 @@ struct uct { /* Used within frame of single genmove. */ struct board_ownermap ownermap; /* Used for coordination among slaves of the distributed engine. */ - struct node_stats *stats; + int stats_hbits; int played_own; int played_all; /* games played by all slaves */ diff --git a/uct/slave.c b/uct/slave.c index 2a26c95..9e79ab0 100644 --- a/uct/slave.c +++ b/uct/slave.c @@ -1,3 +1,31 @@ +/* This is the slave specific part of the distributed engine. + * See introduction at top of distributed/distributed.c. + * The slave maintains a hash table of nodes received from the + * master. When receiving stats the hash table gives a pointer to the + * tree node to update. When sending stats we remember in the tree + * what was previously sent so that only the incremental part has to + * be sent. The incremental part is smaller and can be compressed. + * The compression is not yet done in this version. */ + +/* Similarly the master only sends stats increments. + * They include only contributions from other slaves. */ + +/* The keys for the hash table are coordinate paths from + * a root child to a given node. See distributed/distributed.h + * for the encoding of a path to a 64 bit integer. */ + +/* To allow the master to select the best move, slaves also send + * absolute playout counts for the best top level nodes (children + * of the root node), including contributions from other slaves. */ + +/* Pass me arguments like a=b,c=d,... + * Slave specific arguments (see uct.c for the other uct arguments + * and distributed.c for the port arguments) : + * slave required to indicate slave mode + * max_nodes=MAX_NODES default 80K + * stats_hbits=STATS_HBITS default 24. 2^stats_bits = hash table size + */ + #include #include #include @@ -12,7 +40,6 @@ #include "gtp.h" #include "move.h" #include "timeinfo.h" -#include "distributed/distributed.h" #include "uct/internal.h" #include "uct/search.h" #include "uct/slave.h" @@ -21,6 +48,109 @@ /* UCT infrastructure for a distributed engine slave. */ +/* For debugging only. */ +static struct hash_counts h_counts; +static long parent_not_found = 0; +static long parent_leaf = 0; +static long node_not_found = 0; + +/* Hash table entry mapping path to node. */ +struct tree_hash { + path_t coord_path; + struct tree_node *node; +}; + +void * +uct_htable_alloc(int hbits) +{ + return calloc2(1 << hbits, sizeof(struct tree_hash)); +} + +/* Clear the hash table. Used only when running as slave for the distributed engine. */ +void uct_htable_reset(struct tree *t) +{ + if (!t->htable) return; + double start = time_now(); + memset(t->htable, 0, (1 << t->hbits) * sizeof(t->htable[0])); + if (DEBUGL(3)) + fprintf(stderr, "tree occupied %ld %.1f%% inserts %ld collisions %ld/%ld %.1f%% clear %.3fms\n" + "parent_not_found %.1f%% parent_leaf %.1f%% node_not_found %.1f%%\n", + h_counts.occupied, h_counts.occupied * 100.0 / (1 << t->hbits), + h_counts.inserts, h_counts.collisions, h_counts.lookups, + h_counts.collisions * 100.0 / (h_counts.lookups + 1), + (time_now() - start)*1000, + parent_not_found * 100.0 / (h_counts.lookups + 1), + parent_leaf * 100.0 / (h_counts.lookups + 1), + node_not_found * 100.0 / (h_counts.lookups + 1)); + if (DEBUG_MODE) h_counts.occupied = 0; +} + +/* Find a node given its coord path from root. Insert it in the + * hash table if it is not already there. + * Return the tree node, or NULL if the node cannot be found. + * The tree is modified in background while this function is running. + * prev is only used to optimize the tree search, given that calls to + * tree_find_node are made with sorted coordinates (increasing levels + * and increasing coord within a level). */ +static struct tree_node * +tree_find_node(struct tree *t, struct incr_stats *is, struct tree_node *prev) +{ + assert(t && t->htable); + path_t path = is->coord_path; + /* pass and resign must never be inserted in the hash table. */ + assert(path > 0); + + int hash, parent_hash; + bool found; + find_hash(hash, t->htable, t->hbits, path, found, h_counts); + struct tree_hash *hnode = &t->htable[hash]; + + if (DEBUGVV(7)) + fprintf(stderr, + "find_node %"PRIpath" %s found %d hash %d playouts %d node %p\n", path, + path2sstr(path, t->board), found, hash, is->incr.playouts, hnode->node); + + if (found) return hnode->node; + + /* The master sends parents before children so the parent should + * already be in the hash table. */ + path_t parent_p = parent_path(path, t->board); + struct tree_node *parent; + if (parent_p) { + find_hash(parent_hash, t->htable, t->hbits, + parent_p, found, h_counts); + parent = t->htable[parent_hash].node; + } else { + parent = t->root; + } + struct tree_node *node = NULL; + if (parent) { + /* Search for the node in parent's children. */ + coord_t leaf = leaf_coord(path, t->board); + node = (prev && prev->parent == parent ? prev->sibling : parent->children); + while (node && node->coord != leaf) node = node->sibling; + + if (DEBUG_MODE) parent_leaf += !parent->is_expanded; + } else { + if (DEBUG_MODE) parent_not_found++; + if (DEBUGVV(7)) + fprintf(stderr, "parent of %"PRIpath" %s not found\n", + path, path2sstr(path, t->board)); + } + + /* Insert the node in the hash table. */ + hnode->node = node; + if (DEBUG_MODE) h_counts.inserts++, h_counts.occupied++; + if (DEBUGVV(7)) + fprintf(stderr, "insert path %"PRIpath" %s hash %d playouts %d node %p\n", + path, path2sstr(path, t->board), hash, is->incr.playouts, node); + + if (DEBUG_MODE && !node) node_not_found++; + + hnode->coord_path = path; + return node; +} + enum parse_code uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply) { @@ -66,8 +196,11 @@ receive_stats(struct uct *u, int size) { if (size % sizeof(struct incr_stats)) return false; int nodes = size / sizeof(struct incr_stats); + if (nodes > (1 << u->stats_hbits)) return false; - assert(nodes); + struct tree *t = u->t; + assert(nodes && t->htable); + struct tree_node *prev = NULL; double start_time = time_now(); for (int n = 0; n < nodes; n++) { @@ -78,9 +211,18 @@ receive_stats(struct uct *u, int size) if (UDEBUGL(7)) fprintf(stderr, "read %5d/%d %6d %.3f %"PRIpath" %s\n", n, nodes, is.incr.playouts, is.incr.value, is.coord_path, - path2sstr(is.coord_path, u->t->board)); + path2sstr(is.coord_path, t->board)); + + struct tree_node *node = tree_find_node(t, &is, prev); + if (!node) continue; + + /* node_total += others_incr */ + stats_add_result(&node->u, is.incr.value, is.incr.playouts); + + /* last_total += others_incr */ + stats_add_result(&node->pu, is.incr.value, is.incr.playouts); - /* TODO: update the stats in the tree. */ + prev = node; } if (DEBUGVV(2)) fprintf(stderr, "read args for %d nodes in %.4fms\n", nodes, diff --git a/uct/slave.h b/uct/slave.h index 2c5a697..90280e9 100644 --- a/uct/slave.h +++ b/uct/slave.h @@ -2,6 +2,7 @@ #define ZZGO_UCT_SLAVE_H #include "move.h" +#include "distributed/distributed.h" struct board; struct engine; @@ -9,5 +10,7 @@ struct time_info; enum parse_code uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply); char *uct_genmoves(struct engine *e, struct board *b, struct time_info *ti, enum stone color, char *args, bool pass_all_alive); +void *uct_htable_alloc(int hbits); +void uct_htable_reset(struct tree *t); #endif diff --git a/uct/tree.c b/uct/tree.c index f6270c3..37df37f 100644 --- a/uct/tree.c +++ b/uct/tree.c @@ -17,6 +17,7 @@ #include "uct/internal.h" #include "uct/prior.h" #include "uct/tree.h" +#include "uct/slave.h" /* Allocate one node in the fast_alloc mode. The returned node @@ -63,7 +64,7 @@ tree_init_node(struct tree *t, coord_t coord, int depth, bool fast_alloc) /* Create a tree structure. Pre-allocate all nodes if max_tree_size is > 0. */ struct tree * -tree_init(struct board *board, enum stone color, unsigned long max_tree_size, float ltree_aging) +tree_init(struct board *board, enum stone color, unsigned long max_tree_size, float ltree_aging, int hbits) { struct tree *t = calloc2(1, sizeof(*t)); t->board = board; @@ -83,6 +84,9 @@ tree_init(struct board *board, enum stone color, unsigned long max_tree_size, fl t->ltree_black = tree_init_node(t, pass, 0, false); t->ltree_white = tree_init_node(t, pass, 0, false); t->ltree_aging = ltree_aging; + + t->hbits = hbits; + if (hbits) t->htable = uct_htable_alloc(hbits); return t; } @@ -154,6 +158,8 @@ tree_done(struct tree *t) { tree_done_node(t, t->ltree_black); tree_done_node(t, t->ltree_white); + + if (t->htable) free(t->htable); if (t->nodes) { free(t->nodes); free(t); @@ -430,7 +436,7 @@ tree_garbage_collect(struct tree *tree, unsigned long max_size, struct tree_node assert(tree->nodes && !node->parent && !node->sibling); double start_time = time_now(); - struct tree *temp_tree = tree_init(tree->board, tree->root_color, max_size, tree->ltree_aging); + struct tree *temp_tree = tree_init(tree->board, tree->root_color, max_size, tree->ltree_aging, 0); temp_tree->nodes_size = 0; // We do not want the dummy pass node struct tree_node *temp_node; diff --git a/uct/tree.h b/uct/tree.h index 73b737b..a542840 100644 --- a/uct/tree.h +++ b/uct/tree.h @@ -80,6 +80,8 @@ struct tree_node { bool is_expanded; }; +struct tree_hash; + struct tree { struct board *board; struct tree_node *root; @@ -110,6 +112,11 @@ struct tree { // 1 means don't age at all. float ltree_aging; + /* Hash table used when working as slave for the distributed engine. + * Maps coordinate path to tree node. */ + struct tree_hash *htable; + int hbits; + // Statistics int max_depth; volatile unsigned long nodes_size; // byte size of all allocated nodes @@ -118,7 +125,7 @@ struct tree { }; /* Warning: all functions below except tree_expand_node & tree_leaf_node are THREAD-UNSAFE! */ -struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size, float ltree_aging); +struct tree *tree_init(struct board *board, enum stone color, unsigned long max_tree_size, float ltree_aging, int hbits); void tree_done(struct tree *tree); void tree_dump(struct tree *tree, int thres); void tree_save(struct tree *tree, struct board *b, int thres); diff --git a/uct/uct.c b/uct/uct.c index 3cf637a..1743256 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -18,7 +18,6 @@ #include "playout/light.h" #include "tactics.h" #include "timeinfo.h" -#include "distributed/distributed.h" #include "uct/dynkomi.h" #include "uct/internal.h" #include "uct/prior.h" @@ -39,7 +38,8 @@ static void uct_pondering_start(struct uct *u, struct board *b0, struct tree *t, static void setup_state(struct uct *u, struct board *b, enum stone color) { - u->t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, u->local_tree_aging); + u->t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, + u->local_tree_aging, u->stats_hbits); if (u->force_seed) fast_srandom(u->force_seed); if (UDEBUGL(0)) @@ -77,6 +77,7 @@ uct_prepare_move(struct uct *u, struct board *b, enum stone color) color, u->t->root_color); exit(1); } + uct_htable_reset(u->t); } else { /* We need fresh state. */ @@ -86,7 +87,6 @@ uct_prepare_move(struct uct *u, struct board *b, enum stone color) u->ownermap.playouts = 0; memset(u->ownermap.map, 0, board_size2(b) * sizeof(u->ownermap.map[0])); - memset(u->stats, 0, board_size2(b) * sizeof(u->stats[0])); u->played_own = u->played_all = 0; } @@ -248,7 +248,6 @@ uct_done(struct engine *e) uct_pondering_stop(u); if (u->t) reset_state(u); free(u->ownermap.map); - free(u->stats); free(u->policy); free(u->random_policy); @@ -450,7 +449,7 @@ void uct_dumpbook(struct engine *e, struct board *b, enum stone color) { struct uct *u = e->data; - struct tree *t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, u->local_tree_aging); + struct tree *t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, u->local_tree_aging, 0); tree_load(t, b); tree_dump(t, 0); tree_done(t); @@ -753,6 +752,9 @@ uct_state_init(char *arg, struct board *b) } else if (!strcasecmp(optname, "slave")) { /* Act as slave for the distributed engine. */ u->slave = !optval || atoi(optval); + } else if (!strcasecmp(optname, "stats_hbits") && optval) { + /* Set hash table size to 2^stats_hbits for the shared stats. */ + u->stats_hbits = atoi(optval); } else if (!strcasecmp(optname, "banner") && optval) { /* Additional banner string. This must come as the * last engine parameter. */ @@ -787,6 +789,11 @@ uct_state_init(char *arg, struct board *b) fprintf(stderr, "fast_alloc not supported with root parallelization.\n"); exit(1); } + if (u->slave && !u->parallel_tree) { + /* node->pu used by slave. */ + fprintf(stderr, "slave not supported with root parallelization.\n"); + exit(1); + } if (u->fast_alloc) u->max_tree_size = (100ULL * u->max_tree_size) / (100 + MIN_FREE_MEM_PERCENT); @@ -798,7 +805,10 @@ uct_state_init(char *arg, struct board *b) u->playout->debug_level = u->debug_level; u->ownermap.map = malloc2(board_size2(b) * sizeof(u->ownermap.map[0])); - u->stats = malloc2(board_size2(b) * sizeof(u->stats[0])); + + if (u->slave) { + if (!u->stats_hbits) u->stats_hbits = DEFAULT_STATS_HBITS; + } if (!u->dynkomi) u->dynkomi = uct_dynkomi_init_linear(u, NULL, b); -- 2.11.4.GIT