From 6832f4e727eacc797d1fb991794f89383226c6a3 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Wed, 20 Jan 2010 01:41:01 +0100 Subject: [PATCH] Replace UCT/montecarlo games=N parameter with -t =N main zzgo parameter genmove engine call now takes (struct time_info *) with time settings; currently they are just passed through GTP verbatim from -t (if unset, engine fills in defaults), in the future GTP will change it according to time_left commands. Currently, the engines support just TT_MOVE, TD_GAMES. --- engine.h | 3 ++- gtp.c | 6 +++--- gtp.h | 3 ++- montecarlo/internal.h | 2 +- montecarlo/montecarlo.c | 25 ++++++++++++++++++------- patternscan/patternscan.c | 2 +- random/random.c | 2 +- replay/replay.c | 2 +- timeinfo.h | 1 + uct/uct.c | 45 ++++++++++++++++++++++++++++++++------------- uct/uct.h | 3 ++- zzgo.c | 14 +++++++++++--- 12 files changed, 75 insertions(+), 33 deletions(-) diff --git a/engine.h b/engine.h index 8642716..42e0976 100644 --- a/engine.h +++ b/engine.h @@ -10,13 +10,14 @@ extern bool engine_reset; struct engine; struct move_queue; +struct time_info; typedef char *(*engine_notify_play)(struct engine *e, struct board *b, struct move *m); typedef char *(*engine_chat)(struct engine *e, struct board *b, char *cmd); /* Generate a move. If pass_all_alive is true, shall be generated only * if all stones on the board can be considered alive, without regard to "dead" * considered stones. */ -typedef coord_t *(*engine_genmove)(struct engine *e, struct board *b, enum stone color, bool pass_all_alive); +typedef coord_t *(*engine_genmove)(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive); /* One dead group per queued move (coord_t is (ab)used as group_t). */ typedef void (*engine_dead_group_list)(struct engine *e, struct board *b, struct move_queue *mq); /* e->data and e will be free()d by caller afterwards. */ diff --git a/gtp.c b/gtp.c index 1f34d82..9daa254 100644 --- a/gtp.c +++ b/gtp.c @@ -65,7 +65,7 @@ gtp_error(int id, ...) * Even basic input checking is missing. */ void -gtp_parse(struct board *board, struct engine *engine, char *buf) +gtp_parse(struct board *board, struct engine *engine, struct time_info *ti, char *buf) { #define next_tok(to_) \ to_ = next; \ @@ -163,7 +163,7 @@ gtp_parse(struct board *board, struct engine *engine, char *buf) char *arg; next_tok(arg); enum stone color = str2stone(arg); - coord_t *c = engine->genmove(engine, board, color, !strcasecmp(cmd, "kgs-genmove_cleanup")); + coord_t *c = engine->genmove(engine, board, ti, color, !strcasecmp(cmd, "kgs-genmove_cleanup")); struct move m = { *c, color }; board_play(board, &m); char *str = coord2str(*c, board); @@ -285,7 +285,7 @@ next_group:; char *arg; next_tok(arg); enum stone color = str2stone(arg); - if (uct_genbook(engine, board, color)) + if (uct_genbook(engine, board, ti, color)) gtp_reply(id, NULL); else gtp_error(id, "error generating book", NULL); diff --git a/gtp.h b/gtp.h index b937b05..0f6e698 100644 --- a/gtp.h +++ b/gtp.h @@ -3,7 +3,8 @@ struct board; struct engine; +struct time_info; -void gtp_parse(struct board *b, struct engine *e, char *buf); +void gtp_parse(struct board *b, struct engine *e, struct time_info *ti, char *buf); #endif diff --git a/montecarlo/internal.h b/montecarlo/internal.h index f45816a..c8eec76 100644 --- a/montecarlo/internal.h +++ b/montecarlo/internal.h @@ -12,7 +12,7 @@ struct playout_policy; /* Internal engine state. */ struct montecarlo { int debug_level; - int games, gamelen; + int gamelen; float resign_ratio; int loss_threshold; struct playout_policy *playout; diff --git a/montecarlo/montecarlo.c b/montecarlo/montecarlo.c index 123f167..3be6d97 100644 --- a/montecarlo/montecarlo.c +++ b/montecarlo/montecarlo.c @@ -12,6 +12,7 @@ #include "montecarlo/internal.h" #include "montecarlo/montecarlo.h" #include "playout.h" +#include "timeinfo.h" /* This is simple monte-carlo engine. It plays MC_GAMES random games from the @@ -74,10 +75,23 @@ board_stats_print(struct board *board, struct move_stat *moves, FILE *f) static coord_t * -montecarlo_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive) +montecarlo_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive) { struct montecarlo *mc = e->data; + if (ti->period == TT_TOTAL) { + fprintf(stderr, "Warning: TT_TOTAL time mode not supported, resetting to defaults.\n"); + ti->period = TT_NULL; + } else if (ti->dim == TD_WALLTIME) { + fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n"); + ti->period = TT_NULL; + } + if (ti->period == TT_NULL) { + ti->period = TT_MOVE; + ti->dim = TD_GAMES; + ti->len.games = MC_GAMES; + } + /* resign when the hope for win vanishes */ coord_t top_coord = resign; float top_ratio = mc->resign_ratio; @@ -89,7 +103,7 @@ montecarlo_genmove(struct engine *e, struct board *b, enum stone color, bool pas int losses = 0; int i, superko = 0, good_games = 0; - for (i = 0; i < mc->games; i++) { + for (i = 0; i < ti->len.games; i++) { assert(!b->superko_violation); struct board b2; @@ -119,7 +133,7 @@ montecarlo_genmove(struct engine *e, struct board *b, enum stone color, bool pas if (result == 0) { /* Superko. We just ignore this playout. * And play again. */ - if (unlikely(superko > 2 * mc->games)) { + if (unlikely(superko > 2 * ti->len.games)) { /* Uhh. Triple ko, or something? */ if (MCDEBUGL(0)) fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n"); @@ -198,7 +212,6 @@ montecarlo_state_init(char *arg) struct montecarlo *mc = calloc(1, sizeof(struct montecarlo)); mc->debug_level = 1; - mc->games = MC_GAMES; mc->gamelen = MC_GAMELEN; if (arg) { @@ -217,8 +230,6 @@ montecarlo_state_init(char *arg) mc->debug_level = atoi(optval); else mc->debug_level++; - } else if (!strcasecmp(optname, "games") && optval) { - mc->games = atoi(optval); } else if (!strcasecmp(optname, "gamelen") && optval) { mc->gamelen = atoi(optval); } else if (!strcasecmp(optname, "playout") && optval) { @@ -245,7 +256,7 @@ montecarlo_state_init(char *arg) mc->playout->debug_level = mc->debug_level; mc->resign_ratio = 0.1; /* Resign when most games are lost. */ - mc->loss_threshold = mc->games / 10; /* Stop reading if no loss encountered in first n games. */ + mc->loss_threshold = 5000; /* Stop reading if no loss encountered in first 5000 games. */ return mc; } diff --git a/patternscan/patternscan.c b/patternscan/patternscan.c index 291ba9b..69d4272 100644 --- a/patternscan/patternscan.c +++ b/patternscan/patternscan.c @@ -184,7 +184,7 @@ patternscan_play(struct engine *e, struct board *b, struct move *m) } static coord_t * -patternscan_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive) +patternscan_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive) { fprintf(stderr, "genmove command not available during patternscan!\n"); exit(EXIT_FAILURE); diff --git a/random/random.c b/random/random.c index ee478a6..029a96b 100644 --- a/random/random.c +++ b/random/random.c @@ -7,7 +7,7 @@ #include "random/random.h" static coord_t * -random_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive) +random_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive) { coord_t coord; board_play_random(b, color, &coord, NULL, NULL); diff --git a/replay/replay.c b/replay/replay.c index e6a8748..d79b0d2 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -20,7 +20,7 @@ struct replay { static coord_t * -replay_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive) +replay_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive) { struct replay *r = e->data; diff --git a/timeinfo.h b/timeinfo.h index 7569409..5633397 100644 --- a/timeinfo.h +++ b/timeinfo.h @@ -13,6 +13,7 @@ struct time_info { /* For how long we can spend the time? */ enum time_period { + TT_NULL, // No time limit. Other structure elements are undef. TT_MOVE, // Time for the next move. TT_TOTAL, // Time for the rest of the game. } period; diff --git a/uct/uct.c b/uct/uct.c index 66afa85..9382508 100644 --- a/uct/uct.c +++ b/uct/uct.c @@ -18,6 +18,7 @@ #include "playout/moggy.h" #include "playout/light.h" #include "random.h" +#include "timeinfo.h" #include "tactics.h" #include "uct/internal.h" #include "uct/prior.h" @@ -30,6 +31,8 @@ struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg); static void uct_pondering_stop(struct uct *u); +/* Default number of simulations to perform per move. + * Note that this is now in total over all threads! (Unless TM_ROOT.) */ #define MC_GAMES 80000 #define MC_GAMELEN MAX_GAMELEN @@ -426,10 +429,10 @@ uct_search_stop(void) * done (incl. inherited simulations). If !dyngames, full number of simulations * is simulated in this search. */ static int -uct_search(struct uct *u, struct board *b, enum stone color, struct tree *t, bool dyngames) +uct_search(struct uct *u, struct board *b, struct time_info *ti, enum stone color, struct tree *t, bool dyngames) { /* Required games limit as to be seen in the tree root u.playouts. */ - int games = u->games; + int games = ti->len.games; if (u->t->root->u.playouts > 0) { if (dyngames) { if (UDEBUGL(2)) @@ -531,8 +534,25 @@ uct_pondering_stop(struct uct *u) } +void +time_prep(struct time_info *ti) +{ + if (ti->period == TT_TOTAL) { + fprintf(stderr, "Warning: TT_TOTAL time mode not supported, resetting to defaults.\n"); + ti->period = TT_NULL; + } else if (ti->dim == TD_WALLTIME) { + fprintf(stderr, "Warning: TD_WALLTIME time mode not supported, resetting to defaults.\n"); + ti->period = TT_NULL; + } + if (ti->period == TT_NULL) { + ti->period = TT_MOVE; + ti->dim = TD_GAMES; + ti->len.games = MC_GAMES; + } +} + static coord_t * -uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive) +uct_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive) { struct uct *u = e->data; @@ -544,13 +564,15 @@ uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_a b->superko_violation = false; } + time_prep(ti); + /* Seed the tree. */ uct_pondering_stop(u); prepare_move(e, b, color); assert(u->t); /* Perform the Monte Carlo Tree Search! */ - int played_games = uct_search(u, b, color, u->t, true); + int played_games = uct_search(u, b, ti, color, u->t, true); /* Choose the best move from the tree. */ struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color); @@ -595,15 +617,18 @@ uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_a bool -uct_genbook(struct engine *e, struct board *b, enum stone color) +uct_genbook(struct engine *e, struct board *b, struct time_info *ti, enum stone color) { struct uct *u = e->data; + time_prep(ti); + if (!u->t) prepare_move(e, b, color); assert(u->t); - uct_search(u, b, color, u->t, false); + uct_search(u, b, ti, color, u->t, false); - tree_save(u->t, b, u->games / 100); + assert(ti->dim == TD_GAMES); + tree_save(u->t, b, ti->len.games / 100); return true; } @@ -624,7 +649,6 @@ uct_state_init(char *arg, struct board *b) struct uct *u = calloc(1, sizeof(struct uct)); u->debug_level = 1; - u->games = MC_GAMES; u->gamelen = MC_GAMELEN; u->mercymin = 0; u->expand_p = 2; @@ -661,11 +685,6 @@ uct_state_init(char *arg, struct board *b) u->debug_level = atoi(optval); else u->debug_level++; - } else if (!strcasecmp(optname, "games") && optval) { - /* Number of simulations to perform per move. - * Note that this is now in total over all - * threads! (Unless TM_ROOT.) */ - u->games = atoi(optval); } else if (!strcasecmp(optname, "mercy") && optval) { /* Minimal difference of black/white captures * to stop playout - "Mercy Rule". Speeds up diff --git a/uct/uct.h b/uct/uct.h index 603f7f7..bf2e9c9 100644 --- a/uct/uct.h +++ b/uct/uct.h @@ -7,7 +7,8 @@ struct engine *engine_uct_init(char *arg, struct board *b); struct board; -bool uct_genbook(struct engine *e, struct board *b, enum stone color); +struct time_info; +bool uct_genbook(struct engine *e, struct board *b, struct time_info *ti, enum stone color); void uct_dumpbook(struct engine *e, struct board *b, enum stone color); #endif diff --git a/zzgo.c b/zzgo.c index 4297345..2a599f4 100644 --- a/zzgo.c +++ b/zzgo.c @@ -17,6 +17,7 @@ #include "t-unit/test.h" #include "uct/uct.h" #include "gtp.h" +#include "timeinfo.h" #include "random.h" #include "version.h" @@ -63,12 +64,13 @@ bool engine_reset = false; int main(int argc, char *argv[]) { enum engine_id engine = E_UCT; + struct time_info ti = { .period = TT_NULL }; char *testfile = NULL; seed = time(NULL) ^ getpid(); int opt; - while ((opt = getopt(argc, argv, "e:d:s:u:")) != -1) { + while ((opt = getopt(argc, argv, "e:d:s:t:u:")) != -1) { switch (opt) { case 'e': if (!strcasecmp(optarg, "random")) { @@ -92,12 +94,18 @@ int main(int argc, char *argv[]) case 's': seed = atoi(optarg); break; + case 't': + if (!time_parse(&ti, optarg)) { + fprintf(stderr, "%s: Invalid -t argument %s\n", argv[0], optarg); + exit(1); + } + break; case 'u': testfile = strdup(optarg); break; default: /* '?' */ fprintf(stderr, "Pachi version %s\n", PACHI_VERSION); - fprintf(stderr, "Usage: %s [-e random|replay|patternscan|montecarlo|uct] [-d DEBUG_LEVEL] [-s RANDOM_SEED] [-u TEST_FILENAME] [ENGINE_ARGS]\n", + fprintf(stderr, "Usage: %s [-e random|replay|patternscan|montecarlo|uct] [-d DEBUG_LEVEL] [-s RANDOM_SEED] [-t TIME_SETTINGS] [-u TEST_FILENAME] [ENGINE_ARGS]\n", argv[0]); exit(1); } @@ -122,7 +130,7 @@ int main(int argc, char *argv[]) while (fgets(buf, 4096, stdin)) { if (DEBUGL(1)) fprintf(stderr, "IN: %s", buf); - gtp_parse(b, e, buf); + gtp_parse(b, e, &ti, buf); if (engine_reset) { if (!e->keep_on_clear) { b->es = NULL; -- 2.11.4.GIT