uct/search.c

   1 #include <assert.h>
   2 #include <limits.h>
   3 #include <math.h>
   4 #include <pthread.h>
   5 #include <signal.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8 #include <string.h>
   9 #include <time.h>
  10
  11 #define DEBUG
  12
  13 #include "debug.h"
  14 #include "distributed/distributed.h"
  15 #include "move.h"
  16 #include "random.h"
  17 #include "timeinfo.h"
  18 #include "uct/dynkomi.h"
  19 #include "uct/internal.h"
  20 #include "uct/search.h"
  21 #include "uct/tree.h"
  22 #include "uct/uct.h"
  23 #include "uct/walk.h"
  24
  25
  26 /* Default time settings for the UCT engine. In distributed mode, slaves are
  27  * unlimited by default and all control is done on the master, either in time
  28  * or with total number of playouts over all slaves. (It is also possible but
  29  * not recommended to limit only the slaves; the master then decides the move
  30  * when a majority of slaves have made their choice.) */
  31 static struct time_info default_ti;
  32 static __attribute__((constructor)) void
  33 default_ti_init(void)
  34 {
  35         time_parse(&default_ti, "15");
  36 }
  37
  38 static const struct time_info unlimited_ti = {
  39         .period = TT_MOVE,
  40         .dim = TD_GAMES,
  41         .len = { .games = INT_MAX },
  42 };
  43
  44 /* When terminating UCT search early, the safety margin to add to the
  45  * remaining playout number estimate when deciding whether the result can
  46  * still change. */
  47 #define PLAYOUT_DELTA_SAFEMARGIN 1000
  48
  49 /* Minimal number of simulations to consider early break. */
  50 #define PLAYOUT_EARLY_BREAK_MIN 5000
  51
  52 /* Minimal time to consider early break (in seconds). */
  53 #define TIME_EARLY_BREAK_MIN 1.0
  54
  55
  56 /* Pachi threading structure:
  57  *
  58  * main thread
  59  *   |         main(), GTP communication, ...
  60  *   |         starts and stops the search managed by thread_manager
  61  *   |
  62  * thread_manager
  63  *   |         spawns and collects worker threads
  64  *   |
  65  * worker0
  66  * worker1
  67  * ...
  68  * workerK
  69  *             uct_playouts() loop, doing descend-playout until uct_halt
  70  *
  71  * Another way to look at it is by functions (lines denote thread boundaries):
  72  *
  73  * | uct_genmove()
  74  * | uct_search()            (uct_search_start() .. uct_search_stop())
  75  * | -----------------------
  76  * | spawn_thread_manager()
  77  * | -----------------------
  78  * | spawn_worker()
  79  * V uct_playouts() */
  80
  81 /* Set in thread manager in case the workers should stop. */
  82 volatile sig_atomic_t uct_halt = 0;
  83 /* ID of the thread manager. */
  84 static pthread_t thread_manager;
  85 bool thread_manager_running;
  86
  87 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
  88 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
  89 static volatile int finish_thread;
  90 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
  91
  92 static void *
  93 spawn_worker(void *ctx_)
  94 {
  95         struct uct_thread_ctx *ctx = ctx_;
  96         /* Setup */
  97         fast_srandom(ctx->seed);
  98         /* Run */
  99         ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->ti);
 100         /* Finish */
 101         pthread_mutex_lock(&finish_serializer);
 102         pthread_mutex_lock(&finish_mutex);
 103         finish_thread = ctx->tid;
 104         pthread_cond_signal(&finish_cond);
 105         pthread_mutex_unlock(&finish_mutex);
 106         return ctx;
 107 }
 108
 109 /* Thread manager, controlling worker threads. It must be called with
 110  * finish_mutex lock held, but it will unlock it itself before exiting;
 111  * this is necessary to be completely deadlock-free. */
 112 /* The finish_cond can be signalled for it to stop; in that case,
 113  * the caller should set finish_thread = -1. */
 114 /* After it is started, it will update mctx->t to point at some tree
 115  * used for the actual search, on return
 116  * it will set mctx->games to the number of performed simulations. */
 117 static void *
 118 spawn_thread_manager(void *ctx_)
 119 {
 120         /* In thread_manager, we use only some of the ctx fields. */
 121         struct uct_thread_ctx *mctx = ctx_;
 122         struct uct *u = mctx->u;
 123         struct tree *t = mctx->t;
 124         fast_srandom(mctx->seed);
 125
 126         int played_games = 0;
 127         pthread_t threads[u->threads];
 128         int joined = 0;
 129
 130         uct_halt = 0;
 131
 132         /* Garbage collect the tree by preference when pondering. */
 133         if (u->pondering && t->nodes && t->nodes_size >= t->pruning_threshold) {
 134                 t->root = tree_garbage_collect(t, t->root);
 135         }
 136
 137         /* Spawn threads... */
 138         for (int ti = 0; ti < u->threads; ti++) {
 139                 struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx));
 140                 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
 141                 mctx->t = ctx->t = t;
 142                 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
 143                 ctx->ti = mctx->ti;
 144                 pthread_attr_t a;
 145                 pthread_attr_init(&a);
 146                 pthread_attr_setstacksize(&a, 1048576);
 147                 pthread_create(&threads[ti], &a, spawn_worker, ctx);
 148                 if (UDEBUGL(4))
 149                         fprintf(stderr, "Spawned worker %d\n", ti);
 150         }
 151
 152         /* ...and collect them back: */
 153         while (joined < u->threads) {
 154                 /* Wait for some thread to finish... */
 155                 pthread_cond_wait(&finish_cond, &finish_mutex);
 156                 if (finish_thread < 0) {
 157                         /* Stop-by-caller. Tell the workers to wrap up
 158                          * and unblock them from terminating. */
 159                         uct_halt = 1;
 160                         /* We need to make sure the workers do not complete
 161                          * the termination sequence before we get officially
 162                          * stopped - their wake and the stop wake could get
 163                          * coalesced. */
 164                         pthread_mutex_unlock(&finish_serializer);
 165                         continue;
 166                 }
 167                 /* ...and gather its remnants. */
 168                 struct uct_thread_ctx *ctx;
 169                 pthread_join(threads[finish_thread], (void **) &ctx);
 170                 played_games += ctx->games;
 171                 joined++;
 172                 free(ctx);
 173                 if (UDEBUGL(4))
 174                         fprintf(stderr, "Joined worker %d\n", finish_thread);
 175                 pthread_mutex_unlock(&finish_serializer);
 176         }
 177
 178         pthread_mutex_unlock(&finish_mutex);
 179
 180         mctx->games = played_games;
 181         return mctx;
 182 }
 183
 184
 185 /*** THREAD MANAGER end */
 186
 187 /*** Search infrastructure: */
 188
 189
 190 int
 191 uct_search_games(struct uct_search_state *s)
 192 {
 193         return s->ctx->t->root->u.playouts;
 194 }
 195
 196 void
 197 uct_search_start(struct uct *u, struct board *b, enum stone color,
 198                  struct tree *t, struct time_info *ti,
 199                  struct uct_search_state *s)
 200 {
 201         /* Set up search state. */
 202         s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts;
 203         s->print_interval = u->reportfreq * u->threads;
 204         s->fullmem = false;
 205
 206         if (ti) {
 207                 if (ti->period == TT_NULL) {
 208                         if (u->slave) {
 209                                 *ti = unlimited_ti;
 210                         } else {
 211                                 *ti = default_ti;
 212                                 time_start_timer(ti);
 213                         }
 214                 }
 215                 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, u->max_maintime_ratio, &s->stop);
 216         }
 217
 218         /* Fire up the tree search thread manager, which will in turn
 219          * spawn the searching threads. */
 220         assert(u->threads > 0);
 221         assert(!thread_manager_running);
 222         static struct uct_thread_ctx mctx;
 223         mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536), .ti = ti };
 224         s->ctx = &mctx;
 225         pthread_mutex_lock(&finish_serializer);
 226         pthread_mutex_lock(&finish_mutex);
 227         pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx);
 228         thread_manager_running = true;
 229 }
 230
 231 struct uct_thread_ctx *
 232 uct_search_stop(void)
 233 {
 234         assert(thread_manager_running);
 235
 236         /* Signal thread manager to stop the workers. */
 237         pthread_mutex_lock(&finish_mutex);
 238         finish_thread = -1;
 239         pthread_cond_signal(&finish_cond);
 240         pthread_mutex_unlock(&finish_mutex);
 241
 242         /* Collect the thread manager. */
 243         struct uct_thread_ctx *pctx;
 244         thread_manager_running = false;
 245         pthread_join(thread_manager, (void **) &pctx);
 246         return pctx;
 247 }
 248
 249
 250 void
 251 uct_search_progress(struct uct *u, struct board *b, enum stone color,
 252                     struct tree *t, struct time_info *ti,
 253                     struct uct_search_state *s, int i)
 254 {
 255         struct uct_thread_ctx *ctx = s->ctx;
 256
 257         /* Adjust dynkomi? */
 258         int di = u->dynkomi_interval * u->threads;
 259         if (ctx->t->use_extra_komi && u->dynkomi->permove
 260             && !u->pondering && di
 261             && i > s->last_dynkomi + di) {
 262                 s->last_dynkomi += di;
 263                 floating_t old_dynkomi = ctx->t->extra_komi;
 264                 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
 265                 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
 266                         fprintf(stderr, "dynkomi adjusted (%f -> %f)\n",
 267                                 old_dynkomi, ctx->t->extra_komi);
 268         }
 269
 270         /* Print progress? */
 271         if (i - s->last_print > s->print_interval) {
 272                 s->last_print += s->print_interval; // keep the numbers tidy
 273                 uct_progress_status(u, ctx->t, color, s->last_print, NULL);
 274         }
 275
 276         if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) {
 277                 if (UDEBUGL(2))
 278                         fprintf(stderr, "memory limit hit (%lu > %lu)\n",
 279                                 ctx->t->nodes_size, u->max_tree_size);
 280                 s->fullmem = true;
 281         }
 282 }
 283
 284
 285 /* Determine whether we should terminate the search early. */
 286 static bool
 287 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
 288                 struct time_info *ti, struct time_stop *stop,
 289                 struct tree_node *best, struct tree_node *best2,
 290                 int played, bool fullmem)
 291 {
 292         /* If the memory is full, stop immediately. Since the tree
 293          * cannot grow anymore, some non-well-expanded nodes will
 294          * quickly take over with extremely high ratio since the
 295          * counters are not properly simulated (just as if we use
 296          * non-UCT MonteCarlo). */
 297         /* (XXX: A proper solution would be to prune the tree
 298          * on the spot.) */
 299         if (fullmem)
 300                 return true;
 301
 302         /* Think at least 100ms to avoid a random move. This is particularly
 303          * important in distributed mode, where this function is called frequently. */
 304         double elapsed = 0.0;
 305         if (ti->dim == TD_WALLTIME) {
 306                 elapsed = time_now() - ti->len.t.timer_start;
 307                 if (elapsed < TREE_BUSYWAIT_INTERVAL) return false;
 308         }
 309
 310         /* Break early if we estimate the second-best move cannot
 311          * catch up in assigned time anymore. We use all our time
 312          * if we are in byoyomi with single stone remaining in our
 313          * period, however - it's better to pre-ponder. */
 314         bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
 315         if (best2 && ti->dim == TD_WALLTIME
 316             && played >= PLAYOUT_EARLY_BREAK_MIN && !time_indulgent) {
 317                 double remaining = stop->worst.time - elapsed;
 318                 double pps = ((double)played) / elapsed;
 319                 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
 320                 if (best->u.playouts > best2->u.playouts + estplayouts) {
 321                         if (UDEBUGL(2))
 322                                 fprintf(stderr, "Early stop, result cannot change: "
 323                                         "best %d, best2 %d, estimated %f simulations to go (%d/%f=%f pps)\n",
 324                                         best->u.playouts, best2->u.playouts, estplayouts, played, elapsed, pps);
 325                         return true;
 326                 }
 327         }
 328
 329         /* Early break in won situation. */
 330         if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN
 331             && (ti->dim != TD_WALLTIME || elapsed > TIME_EARLY_BREAK_MIN)
 332             && tree_node_get_value(t, 1, best->u.value) >= u->sure_win_threshold) {
 333                 return true;
 334         }
 335
 336         return false;
 337 }
 338
 339 /* Determine whether we should terminate the search later than expected. */
 340 static bool
 341 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
 342                 struct time_info *ti, struct time_stop *stop,
 343                 struct tree_node *best, struct tree_node *best2,
 344                 struct tree_node *bestr, struct tree_node *winner, int i)
 345 {
 346         if (!best) {
 347                 if (UDEBUGL(2))
 348                         fprintf(stderr, "Did not find best move, still trying...\n");
 349                 return true;
 350         }
 351
 352         /* Do not waste time if we are winning. Spend up to worst time if
 353          * we are unsure, but only desired time if we are sure of winning. */
 354         floating_t beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
 355         if (ti->dim == TD_WALLTIME && beta > 0) {
 356                 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
 357                 double elapsed = time_now() - ti->len.t.timer_start;
 358                 if (elapsed > good_enough) return false;
 359         }
 360
 361         if (u->best2_ratio > 0) {
 362                 /* Check best/best2 simulations ratio. If the
 363                  * two best moves give very similar results,
 364                  * keep simulating. */
 365                 if (best2 && best2->u.playouts
 366                     && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
 367                         if (UDEBUGL(2))
 368                                 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
 369                                         (double)best->u.playouts / best2->u.playouts,
 370                                         u->best2_ratio);
 371                         return true;
 372                 }
 373         }
 374
 375         if (u->bestr_ratio > 0) {
 376                 /* Check best, best_best value difference. If the best move
 377                  * and its best child do not give similar enough results,
 378                  * keep simulating. */
 379                 if (bestr && bestr->u.playouts
 380                     && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
 381                         if (UDEBUGL(2))
 382                                 fprintf(stderr, "Bestr delta %f > threshold %f\n",
 383                                         fabs((double)best->u.value - bestr->u.value),
 384                                         u->bestr_ratio);
 385                         return true;
 386                 }
 387         }
 388
 389         if (winner && winner != best) {
 390                 /* Keep simulating if best explored
 391                  * does not have also highest value. */
 392                 if (UDEBUGL(2))
 393                         fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
 394                                 coord2sstr(node_coord(best), t->board),
 395                                 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
 396                                 coord2sstr(node_coord(winner), t->board),
 397                                 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
 398                 return true;
 399         }
 400
 401         /* No reason to keep simulating, bye. */
 402         return false;
 403 }
 404
 405 bool
 406 uct_search_check_stop(struct uct *u, struct board *b, enum stone color,
 407                       struct tree *t, struct time_info *ti,
 408                       struct uct_search_state *s, int i)
 409 {
 410         struct uct_thread_ctx *ctx = s->ctx;
 411
 412         /* Never consider stopping if we played too few simulations.
 413          * Maybe we risk losing on time when playing in super-extreme
 414          * time pressure but the tree is going to be just too messed
 415          * up otherwise - we might even play invalid suicides or pass
 416          * when we mustn't. */
 417         assert(!(ti->dim == TD_GAMES && ti->len.games < GJ_MINGAMES));
 418         if (i < GJ_MINGAMES)
 419                 return false;
 420
 421         struct tree_node *best = NULL;
 422         struct tree_node *best2 = NULL; // Second-best move.
 423         struct tree_node *bestr = NULL; // best's best child.
 424         struct tree_node *winner = NULL;
 425
 426         best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
 427         if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, node_coord(best));
 428
 429         /* Possibly stop search early if it's no use to try on. */
 430         int played = u->played_all + i - s->base_playouts;
 431         if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played, s->fullmem))
 432                 return true;
 433
 434         /* Check against time settings. */
 435         bool desired_done;
 436         if (ti->dim == TD_WALLTIME) {
 437                 double elapsed = time_now() - ti->len.t.timer_start;
 438                 if (elapsed > s->stop.worst.time) return true;
 439                 desired_done = elapsed > s->stop.desired.time;
 440
 441         } else { assert(ti->dim == TD_GAMES);
 442                 if (i > s->stop.worst.playouts) return true;
 443                 desired_done = i > s->stop.desired.playouts;
 444         }
 445
 446         /* We want to stop simulating, but are willing to keep trying
 447          * if we aren't completely sure about the winner yet. */
 448         if (desired_done) {
 449                 if (u->policy->winner && u->policy->evaluate) {
 450                         struct uct_descent descent = { .node = ctx->t->root };
 451                         u->policy->winner(u->policy, ctx->t, &descent);
 452                         winner = descent.node;
 453                 }
 454                 if (best)
 455                         bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
 456                 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i))
 457                         return true;
 458         }
 459
 460         /* TODO: Early break if best->variance goes under threshold
 461          * and we already have enough playouts (possibly thanks to tbook
 462          * or to pondering)? */
 463         return false;
 464 }
 465
 466
 467 struct tree_node *
 468 uct_search_result(struct uct *u, struct board *b, enum stone color,
 469                   bool pass_all_alive, int played_games, int base_playouts,
 470                   coord_t *best_coord)
 471 {
 472         /* Choose the best move from the tree. */
 473         struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
 474         if (!best) {
 475                 *best_coord = pass;
 476                 return NULL;
 477         }
 478         *best_coord = node_coord(best);
 479         if (UDEBUGL(1))
 480                 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
 481                         coord2sstr(node_coord(best), b), coord_x(node_coord(best), b), coord_y(node_coord(best), b),
 482                         tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
 483                         u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
 484                         u->t->extra_komi);
 485
 486         /* Do not resign if we're so short of time that evaluation of best
 487          * move is completely unreliable, we might be winning actually.
 488          * In this case best is almost random but still better than resign.
 489          * Also do not resign if we are getting bad results while actually
 490          * giving away extra komi points (dynkomi). */
 491         if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_threshold
 492             && !is_pass(node_coord(best)) && best->u.playouts > GJ_MINGAMES
 493             && (!u->t->use_extra_komi || komi_by_color(u->t->extra_komi, color) < 0.5)) {
 494                 *best_coord = resign;
 495                 return NULL;
 496         }
 497
 498         /* If the opponent just passed and we win counting, always
 499          * pass as well. For option stones_only, we pass only when there
 500          * there is nothing else to do, to show how to maximize score. */
 501         if (b->moves > 1 && is_pass(b->last_move.coord) && b->rules != RULES_STONES_ONLY) {
 502                 if (uct_pass_is_safe(u, b, color, pass_all_alive)) {
 503                         if (UDEBUGL(0))
 504                                 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
 505                                         board_official_score(b, NULL) / 2);
 506                         *best_coord = pass;
 507                         best = u->t->root->children; // pass is the first child
 508                         assert(is_pass(node_coord(best)));
 509                         return best;
 510                 } else {
 511                         if (UDEBUGL(3))
 512                                 fprintf(stderr, "Refusing to pass, unsafe; pass_all_alive %d, ownermap #playouts %d, raw score %f\n",
 513                                         pass_all_alive, u->ownermap.playouts,
 514                                         board_official_score(b, NULL) / 2);
 515                 }
 516         }
 517
 518         return best;
 519 }