uct/search.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <pthread.h>
   4 #include <signal.h>
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <string.h>
   8 #include <time.h>
   9
  10 #define DEBUG
  11
  12 #include "debug.h"
  13 #include "distributed/distributed.h"
  14 #include "move.h"
  15 #include "random.h"
  16 #include "timeinfo.h"
  17 #include "uct/dynkomi.h"
  18 #include "uct/internal.h"
  19 #include "uct/search.h"
  20 #include "uct/tree.h"
  21 #include "uct/uct.h"
  22 #include "uct/walk.h"
  23
  24
  25 /* Default number of simulations to perform per move.
  26  * Note that this is now in total over all threads! (Unless TM_ROOT.) */
  27 #define MC_GAMES        80000
  28 static const struct time_info default_ti = {
  29         .period = TT_MOVE,
  30         .dim = TD_GAMES,
  31         .len = { .games = MC_GAMES },
  32 };
  33
  34 /* Once per how many simulations (per thread) to show a progress report line. */
  35 #define TREE_SIMPROGRESS_INTERVAL 10000
  36
  37 /* When terminating UCT search early, the safety margin to add to the
  38  * remaining playout number estimate when deciding whether the result can
  39  * still change. */
  40 #define PLAYOUT_DELTA_SAFEMARGIN 1000
  41
  42 /* Minimal number of simulations to consider early break. */
  43 #define PLAYOUT_EARLY_BREAK_MIN 2500
  44
  45
  46 /* Pachi threading structure:
  47  *
  48  * main thread
  49  *   |         main(), GTP communication, ...
  50  *   |         starts and stops the search managed by thread_manager
  51  *   |
  52  * thread_manager
  53  *   |         spawns and collects worker threads
  54  *   |
  55  * worker0
  56  * worker1
  57  * ...
  58  * workerK
  59  *             uct_playouts() loop, doing descend-playout until uct_halt
  60  *
  61  * Another way to look at it is by functions (lines denote thread boundaries):
  62  *
  63  * | uct_genmove()
  64  * | uct_search()            (uct_search_start() .. uct_search_stop())
  65  * | -----------------------
  66  * | spawn_thread_manager()
  67  * | -----------------------
  68  * | spawn_worker()
  69  * V uct_playouts() */
  70
  71 /* Set in thread manager in case the workers should stop. */
  72 volatile sig_atomic_t uct_halt = 0;
  73 /* ID of the running worker thread. */
  74 __thread int thread_id = -1;
  75 /* ID of the thread manager. */
  76 static pthread_t thread_manager;
  77 bool thread_manager_running;
  78
  79 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
  80 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
  81 static volatile int finish_thread;
  82 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
  83
  84 static void *
  85 spawn_worker(void *ctx_)
  86 {
  87         struct uct_thread_ctx *ctx = ctx_;
  88         /* Setup */
  89         fast_srandom(ctx->seed);
  90         thread_id = ctx->tid;
  91         /* Run */
  92         ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t);
  93         /* Finish */
  94         pthread_mutex_lock(&finish_serializer);
  95         pthread_mutex_lock(&finish_mutex);
  96         finish_thread = ctx->tid;
  97         pthread_cond_signal(&finish_cond);
  98         pthread_mutex_unlock(&finish_mutex);
  99         return ctx;
 100 }
 101
 102 /* Thread manager, controlling worker threads. It must be called with
 103  * finish_mutex lock held, but it will unlock it itself before exiting;
 104  * this is necessary to be completely deadlock-free. */
 105 /* The finish_cond can be signalled for it to stop; in that case,
 106  * the caller should set finish_thread = -1. */
 107 /* After it is started, it will update mctx->t to point at some tree
 108  * used for the actual search (matters only for TM_ROOT), on return
 109  * it will set mctx->games to the number of performed simulations. */
 110 static void *
 111 spawn_thread_manager(void *ctx_)
 112 {
 113         /* In thread_manager, we use only some of the ctx fields. */
 114         struct uct_thread_ctx *mctx = ctx_;
 115         struct uct *u = mctx->u;
 116         struct tree *t = mctx->t;
 117         bool shared_tree = u->parallel_tree;
 118         fast_srandom(mctx->seed);
 119
 120         int played_games = 0;
 121         pthread_t threads[u->threads];
 122         int joined = 0;
 123
 124         uct_halt = 0;
 125
 126         /* Garbage collect the tree by preference when pondering. */
 127         if (u->pondering && t->nodes && t->nodes_size > t->max_tree_size/2) {
 128                 unsigned long temp_size = (MIN_FREE_MEM_PERCENT * t->max_tree_size) / 100;
 129                 t->root = tree_garbage_collect(t, temp_size, t->root);
 130         }
 131
 132         /* Spawn threads... */
 133         for (int ti = 0; ti < u->threads; ti++) {
 134                 struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx));
 135                 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
 136                 mctx->t = ctx->t = shared_tree ? t : tree_copy(t);
 137                 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
 138                 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
 139                 if (UDEBUGL(3))
 140                         fprintf(stderr, "Spawned worker %d\n", ti);
 141         }
 142
 143         /* ...and collect them back: */
 144         while (joined < u->threads) {
 145                 /* Wait for some thread to finish... */
 146                 pthread_cond_wait(&finish_cond, &finish_mutex);
 147                 if (finish_thread < 0) {
 148                         /* Stop-by-caller. Tell the workers to wrap up. */
 149                         uct_halt = 1;
 150                         continue;
 151                 }
 152                 /* ...and gather its remnants. */
 153                 struct uct_thread_ctx *ctx;
 154                 pthread_join(threads[finish_thread], (void **) &ctx);
 155                 played_games += ctx->games;
 156                 joined++;
 157                 if (!shared_tree) {
 158                         if (ctx->t == mctx->t) mctx->t = t;
 159                         tree_merge(t, ctx->t);
 160                         tree_done(ctx->t);
 161                 }
 162                 free(ctx);
 163                 if (UDEBUGL(3))
 164                         fprintf(stderr, "Joined worker %d\n", finish_thread);
 165                 pthread_mutex_unlock(&finish_serializer);
 166         }
 167
 168         pthread_mutex_unlock(&finish_mutex);
 169
 170         if (!shared_tree)
 171                 tree_normalize(mctx->t, u->threads);
 172
 173         mctx->games = played_games;
 174         return mctx;
 175 }
 176
 177
 178 /*** THREAD MANAGER end */
 179
 180 /*** Search infrastructure: */
 181
 182
 183 int
 184 uct_search_games(struct uct_search_state *s)
 185 {
 186         return s->ctx->t->root->u.playouts;
 187 }
 188
 189 void
 190 uct_search_start(struct uct *u, struct board *b, enum stone color,
 191                  struct tree *t, struct time_info *ti,
 192                  struct uct_search_state *s)
 193 {
 194         /* Set up search state. */
 195         s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts;
 196         s->print_interval = TREE_SIMPROGRESS_INTERVAL * (u->thread_model == TM_ROOT ? 1 : u->threads);
 197         s->fullmem = false;
 198
 199         if (ti) {
 200                 if (ti->period == TT_NULL) *ti = default_ti;
 201                 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, &s->stop);
 202         }
 203
 204         /* Fire up the tree search thread manager, which will in turn
 205          * spawn the searching threads. */
 206         assert(u->threads > 0);
 207         assert(!thread_manager_running);
 208         static struct uct_thread_ctx mctx;
 209         mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536) };
 210         s->ctx = &mctx;
 211         pthread_mutex_lock(&finish_mutex);
 212         pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx);
 213         thread_manager_running = true;
 214 }
 215
 216 struct uct_thread_ctx *
 217 uct_search_stop(void)
 218 {
 219         assert(thread_manager_running);
 220
 221         /* Signal thread manager to stop the workers. */
 222         pthread_mutex_lock(&finish_mutex);
 223         finish_thread = -1;
 224         pthread_cond_signal(&finish_cond);
 225         pthread_mutex_unlock(&finish_mutex);
 226
 227         /* Collect the thread manager. */
 228         struct uct_thread_ctx *pctx;
 229         thread_manager_running = false;
 230         pthread_join(thread_manager, (void **) &pctx);
 231         return pctx;
 232 }
 233
 234
 235 void
 236 uct_search_progress(struct uct *u, struct board *b, enum stone color,
 237                     struct tree *t, struct time_info *ti,
 238                     struct uct_search_state *s, int i)
 239 {
 240         struct uct_thread_ctx *ctx = s->ctx;
 241
 242         /* Adjust dynkomi? */
 243         if (ctx->t->use_extra_komi && u->dynkomi->permove
 244             && !u->pondering && u->dynkomi_interval
 245             && i > s->last_dynkomi + u->dynkomi_interval) {
 246                 s->last_dynkomi += u->dynkomi_interval;
 247                 float old_dynkomi = ctx->t->extra_komi;
 248                 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
 249                 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
 250                         fprintf(stderr, "dynkomi adjusted (%f -> %f)\n",
 251                                 old_dynkomi, ctx->t->extra_komi);
 252         }
 253
 254         /* Print progress? */
 255         if (i - s->last_print > s->print_interval) {
 256                 s->last_print += s->print_interval; // keep the numbers tidy
 257                 uct_progress_status(u, ctx->t, color, s->last_print);
 258         }
 259
 260         if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) {
 261                 if (UDEBUGL(2))
 262                         fprintf(stderr, "memory limit hit (%lu > %lu)\n",
 263                                 ctx->t->nodes_size, u->max_tree_size);
 264                 s->fullmem = true;
 265         }
 266 }
 267
 268
 269 /* Determine whether we should terminate the search early. */
 270 static bool
 271 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
 272                 struct time_info *ti, struct time_stop *stop,
 273                 struct tree_node *best, struct tree_node *best2,
 274                 int played)
 275 {
 276         /* Break early if we estimate the second-best move cannot
 277          * catch up in assigned time anymore. We use all our time
 278          * if we are in byoyomi with single stone remaining in our
 279          * period, however - it's better to pre-ponder. */
 280         bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
 281         if (best2 && ti->dim == TD_WALLTIME && !time_indulgent) {
 282                 double elapsed = time_now() - ti->len.t.timer_start;
 283                 double remaining = stop->worst.time - elapsed;
 284                 double pps = ((double)played) / elapsed;
 285                 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
 286                 if (best->u.playouts > best2->u.playouts + estplayouts) {
 287                         if (UDEBUGL(2))
 288                                 fprintf(stderr, "Early stop, result cannot change: "
 289                                         "best %d, best2 %d, estimated %f simulations to go\n",
 290                                         best->u.playouts, best2->u.playouts, estplayouts);
 291                         return true;
 292                 }
 293         }
 294
 295         /* Early break in won situation. */
 296         if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN
 297             && tree_node_get_value(t, 1, best->u.value) >= u->loss_threshold) {
 298                 /* Still use at least half the desired time (top-bounded by
 299                  * a reasonably confident number of simulations). It is silly
 300                  * to lose a won game because we played a bad move in 0.1s. */
 301                 double elapsed = 0;
 302                 if (ti->dim == TD_WALLTIME) {
 303                         elapsed = time_now() - ti->len.t.timer_start;
 304                         return elapsed >= 0.5 * stop->desired.time;
 305                 } else {
 306                         return true;
 307                 }
 308         }
 309
 310         return false;
 311 }
 312
 313 /* Determine whether we should terminate the search later than expected. */
 314 static bool
 315 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
 316                 struct time_info *ti, struct time_stop *stop,
 317                 struct tree_node *best, struct tree_node *best2,
 318                 struct tree_node *bestr, struct tree_node *winner, int i)
 319 {
 320         if (!best) {
 321                 if (UDEBUGL(2))
 322                         fprintf(stderr, "Did not find best move, still trying...\n");
 323                 return true;
 324         }
 325
 326         /* Do not waste time if we are winning. Spend up to worst time if
 327          * we are unsure, but only desired time if we are sure of winning. */
 328         float beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
 329         if (ti->dim == TD_WALLTIME && beta > 0) {
 330                 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
 331                 double elapsed = time_now() - ti->len.t.timer_start;
 332                 if (elapsed > good_enough) return false;
 333         }
 334
 335         if (u->best2_ratio > 0) {
 336                 /* Check best/best2 simulations ratio. If the
 337                  * two best moves give very similar results,
 338                  * keep simulating. */
 339                 if (best2 && best2->u.playouts
 340                     && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
 341                         if (UDEBUGL(2))
 342                                 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
 343                                         (double)best->u.playouts / best2->u.playouts,
 344                                         u->best2_ratio);
 345                         return true;
 346                 }
 347         }
 348
 349         if (u->bestr_ratio > 0) {
 350                 /* Check best, best_best value difference. If the best move
 351                  * and its best child do not give similar enough results,
 352                  * keep simulating. */
 353                 if (bestr && bestr->u.playouts
 354                     && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
 355                         if (UDEBUGL(2))
 356                                 fprintf(stderr, "Bestr delta %f > threshold %f\n",
 357                                         fabs((double)best->u.value - bestr->u.value),
 358                                         u->bestr_ratio);
 359                         return true;
 360                 }
 361         }
 362
 363         if (winner && winner != best) {
 364                 /* Keep simulating if best explored
 365                  * does not have also highest value. */
 366                 if (UDEBUGL(2))
 367                         fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
 368                                 coord2sstr(best->coord, t->board),
 369                                 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
 370                                 coord2sstr(winner->coord, t->board),
 371                                 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
 372                 return true;
 373         }
 374
 375         /* No reason to keep simulating, bye. */
 376         return false;
 377 }
 378
 379 bool
 380 uct_search_check_stop(struct uct *u, struct board *b, enum stone color,
 381                       struct tree *t, struct time_info *ti,
 382                       struct uct_search_state *s, int i)
 383 {
 384         struct uct_thread_ctx *ctx = s->ctx;
 385
 386         /* Never consider stopping if we played too few simulations.
 387          * Maybe we risk losing on time when playing in super-extreme
 388          * time pressure but the tree is going to be just too messed
 389          * up otherwise - we might even play invalid suicides or pass
 390          * when we mustn't. */
 391         if (i < GJ_MINGAMES)
 392                 return false;
 393
 394         struct tree_node *best = NULL;
 395         struct tree_node *best2 = NULL; // Second-best move.
 396         struct tree_node *bestr = NULL; // best's best child.
 397         struct tree_node *winner = NULL;
 398
 399         best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
 400         if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, best->coord);
 401
 402         /* Possibly stop search early if it's no use to try on. */
 403         int played = u->played_all + i - s->base_playouts;
 404         if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played))
 405                 return true;
 406
 407         /* Check against time settings. */
 408         bool desired_done;
 409         if (ti->dim == TD_WALLTIME) {
 410                 double elapsed = time_now() - ti->len.t.timer_start;
 411                 if (elapsed > s->stop.worst.time) return true;
 412                 desired_done = elapsed > s->stop.desired.time;
 413
 414         } else { assert(ti->dim == TD_GAMES);
 415                 if (i > s->stop.worst.playouts) return true;
 416                 desired_done = i > s->stop.desired.playouts;
 417         }
 418
 419         /* We want to stop simulating, but are willing to keep trying
 420          * if we aren't completely sure about the winner yet. */
 421         if (desired_done) {
 422                 if (u->policy->winner && u->policy->evaluate) {
 423                         struct uct_descent descent = { .node = ctx->t->root };
 424                         u->policy->winner(u->policy, ctx->t, &descent);
 425                         winner = descent.node;
 426                 }
 427                 if (best)
 428                         bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
 429                 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i))
 430                         return true;
 431         }
 432
 433         /* TODO: Early break if best->variance goes under threshold
 434          * and we already have enough playouts (possibly thanks to book
 435          * or to pondering)? */
 436         return false;
 437 }
 438
 439
 440 struct tree_node *
 441 uct_search_result(struct uct *u, struct board *b, enum stone color,
 442                   bool pass_all_alive, int played_games, int base_playouts,
 443                   coord_t *best_coord)
 444 {
 445         /* Choose the best move from the tree. */
 446         struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
 447         if (!best) {
 448                 *best_coord = pass;
 449                 return NULL;
 450         }
 451         *best_coord = best->coord;
 452         if (UDEBUGL(1))
 453                 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
 454                         coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
 455                         tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
 456                         u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
 457                         u->t->extra_komi);
 458
 459         /* Do not resign if we're so short of time that evaluation of best
 460          * move is completely unreliable, we might be winning actually.
 461          * In this case best is almost random but still better than resign.
 462          * Also do not resign if we are getting bad results while actually
 463          * giving away extra komi points (dynkomi). */
 464         if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio
 465             && !is_pass(best->coord) && best->u.playouts > GJ_MINGAMES
 466             && komi_by_color(u->t->extra_komi, color) < 0.5) {
 467                 *best_coord = resign;
 468                 return NULL;
 469         }
 470
 471         /* If the opponent just passed and we win counting, always
 472          * pass as well. */
 473         if (b->moves > 1 && is_pass(b->last_move.coord)) {
 474                 /* Make sure enough playouts are simulated. */
 475                 while (u->ownermap.playouts < GJ_MINGAMES)
 476                         uct_playout(u, b, color, u->t);
 477                 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
 478                         if (UDEBUGL(0))
 479                                 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
 480                                         board_official_score(b, NULL) / 2);
 481                         *best_coord = pass;
 482                         return NULL;
 483                 }
 484         }
 485
 486         return best;
 487 }