uct/search.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <pthread.h>
   4 #include <signal.h>
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <string.h>
   8 #include <time.h>
   9
  10 #define DEBUG
  11
  12 #include "debug.h"
  13 #include "distributed/distributed.h"
  14 #include "move.h"
  15 #include "random.h"
  16 #include "timeinfo.h"
  17 #include "uct/dynkomi.h"
  18 #include "uct/internal.h"
  19 #include "uct/search.h"
  20 #include "uct/tree.h"
  21 #include "uct/uct.h"
  22 #include "uct/walk.h"
  23
  24
  25 /* Default number of simulations to perform per move.
  26  * Note that this is now in total over all threads!. */
  27 #define MC_GAMES        80000
  28 static const struct time_info default_ti = {
  29         .period = TT_MOVE,
  30         .dim = TD_GAMES,
  31         .len = { .games = MC_GAMES },
  32 };
  33
  34 /* When terminating UCT search early, the safety margin to add to the
  35  * remaining playout number estimate when deciding whether the result can
  36  * still change. */
  37 #define PLAYOUT_DELTA_SAFEMARGIN 1000
  38
  39 /* Minimal number of simulations to consider early break. */
  40 #define PLAYOUT_EARLY_BREAK_MIN 5000
  41
  42
  43 /* Pachi threading structure:
  44  *
  45  * main thread
  46  *   |         main(), GTP communication, ...
  47  *   |         starts and stops the search managed by thread_manager
  48  *   |
  49  * thread_manager
  50  *   |         spawns and collects worker threads
  51  *   |
  52  * worker0
  53  * worker1
  54  * ...
  55  * workerK
  56  *             uct_playouts() loop, doing descend-playout until uct_halt
  57  *
  58  * Another way to look at it is by functions (lines denote thread boundaries):
  59  *
  60  * | uct_genmove()
  61  * | uct_search()            (uct_search_start() .. uct_search_stop())
  62  * | -----------------------
  63  * | spawn_thread_manager()
  64  * | -----------------------
  65  * | spawn_worker()
  66  * V uct_playouts() */
  67
  68 /* Set in thread manager in case the workers should stop. */
  69 volatile sig_atomic_t uct_halt = 0;
  70 /* ID of the thread manager. */
  71 static pthread_t thread_manager;
  72 bool thread_manager_running;
  73
  74 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
  75 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
  76 static volatile int finish_thread;
  77 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
  78
  79 static void *
  80 spawn_worker(void *ctx_)
  81 {
  82         struct uct_thread_ctx *ctx = ctx_;
  83         /* Setup */
  84         fast_srandom(ctx->seed);
  85         /* Run */
  86         ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->ti);
  87         /* Finish */
  88         pthread_mutex_lock(&finish_serializer);
  89         pthread_mutex_lock(&finish_mutex);
  90         finish_thread = ctx->tid;
  91         pthread_cond_signal(&finish_cond);
  92         pthread_mutex_unlock(&finish_mutex);
  93         return ctx;
  94 }
  95
  96 /* Thread manager, controlling worker threads. It must be called with
  97  * finish_mutex lock held, but it will unlock it itself before exiting;
  98  * this is necessary to be completely deadlock-free. */
  99 /* The finish_cond can be signalled for it to stop; in that case,
 100  * the caller should set finish_thread = -1. */
 101 /* After it is started, it will update mctx->t to point at some tree
 102  * used for the actual search, on return
 103  * it will set mctx->games to the number of performed simulations. */
 104 static void *
 105 spawn_thread_manager(void *ctx_)
 106 {
 107         /* In thread_manager, we use only some of the ctx fields. */
 108         struct uct_thread_ctx *mctx = ctx_;
 109         struct uct *u = mctx->u;
 110         struct tree *t = mctx->t;
 111         fast_srandom(mctx->seed);
 112
 113         int played_games = 0;
 114         pthread_t threads[u->threads];
 115         int joined = 0;
 116
 117         uct_halt = 0;
 118
 119         /* Garbage collect the tree by preference when pondering. */
 120         if (u->pondering && t->nodes && t->nodes_size >= t->pruning_threshold) {
 121                 t->root = tree_garbage_collect(t, t->root);
 122         }
 123
 124         /* Spawn threads... */
 125         for (int ti = 0; ti < u->threads; ti++) {
 126                 struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx));
 127                 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
 128                 mctx->t = ctx->t = t;
 129                 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
 130                 ctx->ti = mctx->ti;
 131                 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
 132                 if (UDEBUGL(3))
 133                         fprintf(stderr, "Spawned worker %d\n", ti);
 134         }
 135
 136         /* ...and collect them back: */
 137         while (joined < u->threads) {
 138                 /* Wait for some thread to finish... */
 139                 pthread_cond_wait(&finish_cond, &finish_mutex);
 140                 if (finish_thread < 0) {
 141                         /* Stop-by-caller. Tell the workers to wrap up
 142                          * and unblock them from terminating. */
 143                         uct_halt = 1;
 144                         /* We need to make sure the workers do not complete
 145                          * the termination sequence before we get officially
 146                          * stopped - their wake and the stop wake could get
 147                          * coalesced. */
 148                         pthread_mutex_unlock(&finish_serializer);
 149                         continue;
 150                 }
 151                 /* ...and gather its remnants. */
 152                 struct uct_thread_ctx *ctx;
 153                 pthread_join(threads[finish_thread], (void **) &ctx);
 154                 played_games += ctx->games;
 155                 joined++;
 156                 free(ctx);
 157                 if (UDEBUGL(3))
 158                         fprintf(stderr, "Joined worker %d\n", finish_thread);
 159                 pthread_mutex_unlock(&finish_serializer);
 160         }
 161
 162         pthread_mutex_unlock(&finish_mutex);
 163
 164         mctx->games = played_games;
 165         return mctx;
 166 }
 167
 168
 169 /*** THREAD MANAGER end */
 170
 171 /*** Search infrastructure: */
 172
 173
 174 int
 175 uct_search_games(struct uct_search_state *s)
 176 {
 177         return s->ctx->t->root->u.playouts;
 178 }
 179
 180 void
 181 uct_search_start(struct uct *u, struct board *b, enum stone color,
 182                  struct tree *t, struct time_info *ti,
 183                  struct uct_search_state *s)
 184 {
 185         /* Set up search state. */
 186         s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts;
 187         s->print_interval = u->reportfreq * u->threads;
 188         s->fullmem = false;
 189
 190         if (ti) {
 191                 if (ti->period == TT_NULL) *ti = default_ti;
 192                 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, u->max_maintime_ratio, &s->stop);
 193         }
 194
 195         /* Fire up the tree search thread manager, which will in turn
 196          * spawn the searching threads. */
 197         assert(u->threads > 0);
 198         assert(!thread_manager_running);
 199         static struct uct_thread_ctx mctx;
 200         mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536), .ti = ti };
 201         s->ctx = &mctx;
 202         pthread_mutex_lock(&finish_serializer);
 203         pthread_mutex_lock(&finish_mutex);
 204         pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx);
 205         thread_manager_running = true;
 206 }
 207
 208 struct uct_thread_ctx *
 209 uct_search_stop(void)
 210 {
 211         assert(thread_manager_running);
 212
 213         /* Signal thread manager to stop the workers. */
 214         pthread_mutex_lock(&finish_mutex);
 215         finish_thread = -1;
 216         pthread_cond_signal(&finish_cond);
 217         pthread_mutex_unlock(&finish_mutex);
 218
 219         /* Collect the thread manager. */
 220         struct uct_thread_ctx *pctx;
 221         thread_manager_running = false;
 222         pthread_join(thread_manager, (void **) &pctx);
 223         return pctx;
 224 }
 225
 226
 227 void
 228 uct_search_progress(struct uct *u, struct board *b, enum stone color,
 229                     struct tree *t, struct time_info *ti,
 230                     struct uct_search_state *s, int i)
 231 {
 232         struct uct_thread_ctx *ctx = s->ctx;
 233
 234         /* Adjust dynkomi? */
 235         int di = u->dynkomi_interval * u->threads;
 236         if (ctx->t->use_extra_komi && u->dynkomi->permove
 237             && !u->pondering && di
 238             && i > s->last_dynkomi + di) {
 239                 s->last_dynkomi += di;
 240                 floating_t old_dynkomi = ctx->t->extra_komi;
 241                 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
 242                 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
 243                         fprintf(stderr, "dynkomi adjusted (%f -> %f)\n",
 244                                 old_dynkomi, ctx->t->extra_komi);
 245         }
 246
 247         /* Print progress? */
 248         if (i - s->last_print > s->print_interval) {
 249                 s->last_print += s->print_interval; // keep the numbers tidy
 250                 uct_progress_status(u, ctx->t, color, s->last_print, false);
 251         }
 252
 253         if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) {
 254                 if (UDEBUGL(2))
 255                         fprintf(stderr, "memory limit hit (%lu > %lu)\n",
 256                                 ctx->t->nodes_size, u->max_tree_size);
 257                 s->fullmem = true;
 258         }
 259 }
 260
 261
 262 /* Determine whether we should terminate the search early. */
 263 static bool
 264 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
 265                 struct time_info *ti, struct time_stop *stop,
 266                 struct tree_node *best, struct tree_node *best2,
 267                 int played, bool fullmem)
 268 {
 269         /* If the memory is full, stop immediately. Since the tree
 270          * cannot grow anymore, some non-well-expanded nodes will
 271          * quickly take over with extremely high ratio since the
 272          * counters are not properly simulated (just as if we use
 273          * non-UCT MonteCarlo). */
 274         /* (XXX: A proper solution would be to prune the tree
 275          * on the spot.) */
 276         if (fullmem)
 277                 return true;
 278
 279         /* Think at least 100ms to avoid a random move. This is particularly
 280          * important in distributed mode, where this function is called frequently. */
 281         double elapsed = 0.0;
 282         if (ti->dim == TD_WALLTIME) {
 283                 elapsed = time_now() - ti->len.t.timer_start;
 284                 if (elapsed < TREE_BUSYWAIT_INTERVAL) return false;
 285         }
 286
 287         /* Break early if we estimate the second-best move cannot
 288          * catch up in assigned time anymore. We use all our time
 289          * if we are in byoyomi with single stone remaining in our
 290          * period, however - it's better to pre-ponder. */
 291         bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
 292         if (best2 && ti->dim == TD_WALLTIME
 293             && played >= PLAYOUT_EARLY_BREAK_MIN && !time_indulgent) {
 294                 double remaining = stop->worst.time - elapsed;
 295                 double pps = ((double)played) / elapsed;
 296                 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
 297                 if (best->u.playouts > best2->u.playouts + estplayouts) {
 298                         if (UDEBUGL(2))
 299                                 fprintf(stderr, "Early stop, result cannot change: "
 300                                         "best %d, best2 %d, estimated %f simulations to go (%d/%f=%f pps)\n",
 301                                         best->u.playouts, best2->u.playouts, estplayouts, played, elapsed, pps);
 302                         return true;
 303                 }
 304         }
 305
 306         /* Early break in won situation. */
 307         if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN
 308             && tree_node_get_value(t, 1, best->u.value) >= u->sure_win_threshold) {
 309                 return true;
 310         }
 311
 312         return false;
 313 }
 314
 315 /* Determine whether we should terminate the search later than expected. */
 316 static bool
 317 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
 318                 struct time_info *ti, struct time_stop *stop,
 319                 struct tree_node *best, struct tree_node *best2,
 320                 struct tree_node *bestr, struct tree_node *winner, int i)
 321 {
 322         if (!best) {
 323                 if (UDEBUGL(2))
 324                         fprintf(stderr, "Did not find best move, still trying...\n");
 325                 return true;
 326         }
 327
 328         /* Do not waste time if we are winning. Spend up to worst time if
 329          * we are unsure, but only desired time if we are sure of winning. */
 330         floating_t beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
 331         if (ti->dim == TD_WALLTIME && beta > 0) {
 332                 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
 333                 double elapsed = time_now() - ti->len.t.timer_start;
 334                 if (elapsed > good_enough) return false;
 335         }
 336
 337         if (u->best2_ratio > 0) {
 338                 /* Check best/best2 simulations ratio. If the
 339                  * two best moves give very similar results,
 340                  * keep simulating. */
 341                 if (best2 && best2->u.playouts
 342                     && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
 343                         if (UDEBUGL(2))
 344                                 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
 345                                         (double)best->u.playouts / best2->u.playouts,
 346                                         u->best2_ratio);
 347                         return true;
 348                 }
 349         }
 350
 351         if (u->bestr_ratio > 0) {
 352                 /* Check best, best_best value difference. If the best move
 353                  * and its best child do not give similar enough results,
 354                  * keep simulating. */
 355                 if (bestr && bestr->u.playouts
 356                     && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
 357                         if (UDEBUGL(2))
 358                                 fprintf(stderr, "Bestr delta %f > threshold %f\n",
 359                                         fabs((double)best->u.value - bestr->u.value),
 360                                         u->bestr_ratio);
 361                         return true;
 362                 }
 363         }
 364
 365         if (winner && winner != best) {
 366                 /* Keep simulating if best explored
 367                  * does not have also highest value. */
 368                 if (UDEBUGL(2))
 369                         fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
 370                                 coord2sstr(best->coord, t->board),
 371                                 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
 372                                 coord2sstr(winner->coord, t->board),
 373                                 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
 374                 return true;
 375         }
 376
 377         /* No reason to keep simulating, bye. */
 378         return false;
 379 }
 380
 381 bool
 382 uct_search_check_stop(struct uct *u, struct board *b, enum stone color,
 383                       struct tree *t, struct time_info *ti,
 384                       struct uct_search_state *s, int i)
 385 {
 386         struct uct_thread_ctx *ctx = s->ctx;
 387
 388         /* Never consider stopping if we played too few simulations.
 389          * Maybe we risk losing on time when playing in super-extreme
 390          * time pressure but the tree is going to be just too messed
 391          * up otherwise - we might even play invalid suicides or pass
 392          * when we mustn't. */
 393         assert(!(ti->dim == TD_GAMES && ti->len.games < GJ_MINGAMES));
 394         if (i < GJ_MINGAMES)
 395                 return false;
 396
 397         struct tree_node *best = NULL;
 398         struct tree_node *best2 = NULL; // Second-best move.
 399         struct tree_node *bestr = NULL; // best's best child.
 400         struct tree_node *winner = NULL;
 401
 402         best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
 403         if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, best->coord);
 404
 405         /* Possibly stop search early if it's no use to try on. */
 406         int played = u->played_all + i - s->base_playouts;
 407         if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played, s->fullmem))
 408                 return true;
 409
 410         /* Check against time settings. */
 411         bool desired_done;
 412         if (ti->dim == TD_WALLTIME) {
 413                 double elapsed = time_now() - ti->len.t.timer_start;
 414                 if (elapsed > s->stop.worst.time) return true;
 415                 desired_done = elapsed > s->stop.desired.time;
 416
 417         } else { assert(ti->dim == TD_GAMES);
 418                 if (i > s->stop.worst.playouts) return true;
 419                 desired_done = i > s->stop.desired.playouts;
 420         }
 421
 422         /* We want to stop simulating, but are willing to keep trying
 423          * if we aren't completely sure about the winner yet. */
 424         if (desired_done) {
 425                 if (u->policy->winner && u->policy->evaluate) {
 426                         struct uct_descent descent = { .node = ctx->t->root };
 427                         u->policy->winner(u->policy, ctx->t, &descent);
 428                         winner = descent.node;
 429                 }
 430                 if (best)
 431                         bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
 432                 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i))
 433                         return true;
 434         }
 435
 436         /* TODO: Early break if best->variance goes under threshold
 437          * and we already have enough playouts (possibly thanks to tbook
 438          * or to pondering)? */
 439         return false;
 440 }
 441
 442
 443 struct tree_node *
 444 uct_search_result(struct uct *u, struct board *b, enum stone color,
 445                   bool pass_all_alive, int played_games, int base_playouts,
 446                   coord_t *best_coord)
 447 {
 448         /* Choose the best move from the tree. */
 449         struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
 450         if (!best) {
 451                 *best_coord = pass;
 452                 return NULL;
 453         }
 454         *best_coord = best->coord;
 455         if (UDEBUGL(1))
 456                 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
 457                         coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
 458                         tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
 459                         u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
 460                         u->t->extra_komi);
 461
 462         /* Do not resign if we're so short of time that evaluation of best
 463          * move is completely unreliable, we might be winning actually.
 464          * In this case best is almost random but still better than resign.
 465          * Also do not resign if we are getting bad results while actually
 466          * giving away extra komi points (dynkomi). */
 467         if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_threshold
 468             && !is_pass(best->coord) && best->u.playouts > GJ_MINGAMES
 469             && (!u->t->use_extra_komi || komi_by_color(u->t->extra_komi, color) < 0.5)) {
 470                 *best_coord = resign;
 471                 return NULL;
 472         }
 473
 474         /* If the opponent just passed and we win counting, always
 475          * pass as well. */
 476         if (b->moves > 1 && is_pass(b->last_move.coord)) {
 477                 /* Make sure enough playouts are simulated. */
 478                 while (u->ownermap.playouts < GJ_MINGAMES)
 479                         uct_playout(u, b, color, u->t);
 480                 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
 481                         if (UDEBUGL(0))
 482                                 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
 483                                         board_official_score(b, NULL) / 2);
 484                         *best_coord = pass;
 485                         best = u->t->root->children; // pass is the first child
 486                         assert(is_pass(best->coord));
 487                         return best;
 488                 }
 489         }
 490
 491         return best;
 492 }