uct/uct.c

   1 #include <assert.h>
   2 #include <pthread.h>
   3 #include <signal.h>
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <string.h>
   7 #include <time.h>
   8
   9 #define DEBUG
  10
  11 #include "debug.h"
  12 #include "board.h"
  13 #include "gtp.h"
  14 #include "move.h"
  15 #include "mq.h"
  16 #include "playout.h"
  17 #include "playout/elo.h"
  18 #include "playout/moggy.h"
  19 #include "playout/light.h"
  20 #include "random.h"
  21 #include "tactics.h"
  22 #include "uct/internal.h"
  23 #include "uct/prior.h"
  24 #include "uct/tree.h"
  25 #include "uct/uct.h"
  26 #include "uct/walk.h"
  27
  28 struct uct_policy *policy_ucb1_init(struct uct *u, char *arg);
  29 struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg);
  30 static void uct_pondering_stop(struct uct *u);
  31
  32
  33 #define MC_GAMES        80000
  34 #define MC_GAMELEN      MAX_GAMELEN
  35
  36 /* How big proportion of ownermap counts must be of one color to consider
  37  * the point sure. */
  38 #define GJ_THRES        0.8
  39 /* How many games to consider at minimum before judging groups. */
  40 #define GJ_MINGAMES     500
  41
  42 /* How often to inspect the tree from the main thread to check for playout
  43  * stop, progress reports, etc. A (struct timespec) initializer. */
  44 #define TREE_BUSYWAIT_INTERVAL { .tv_sec = 0, .tv_nsec = 100*1000000 /* 100ms */ }
  45
  46
  47 static void
  48 setup_state(struct uct *u, struct board *b, enum stone color)
  49 {
  50         u->t = tree_init(b, color);
  51         if (u->force_seed)
  52                 fast_srandom(u->force_seed);
  53         if (UDEBUGL(0))
  54                 fprintf(stderr, "Fresh board with random seed %lu\n", fast_getseed());
  55         //board_print(b, stderr);
  56         if (!u->no_book && b->moves == 0) {
  57                 assert(color == S_BLACK);
  58                 tree_load(u->t, b);
  59         }
  60 }
  61
  62 static void
  63 reset_state(struct uct *u)
  64 {
  65         assert(u->t);
  66         tree_done(u->t); u->t = NULL;
  67 }
  68
  69 static void
  70 prepare_move(struct engine *e, struct board *b, enum stone color)
  71 {
  72         struct uct *u = e->data;
  73
  74         if (u->t) {
  75                 /* Verify that we have sane state. */
  76                 assert(b->es == u);
  77                 assert(u->t && b->moves);
  78                 if (color != stone_other(u->t->root_color)) {
  79                         fprintf(stderr, "Fatal: Non-alternating play detected %d %d\n",
  80                                 color, u->t->root_color);
  81                         exit(1);
  82                 }
  83
  84         } else {
  85                 /* We need fresh state. */
  86                 b->es = u;
  87                 setup_state(u, b, color);
  88         }
  89
  90         if (u->dynkomi && u->dynkomi > b->moves && (color & u->dynkomi_mask))
  91                 u->t->extra_komi = uct_get_extra_komi(u, b);
  92
  93         u->ownermap.playouts = 0;
  94         memset(u->ownermap.map, 0, board_size2(b) * sizeof(u->ownermap.map[0]));
  95 }
  96
  97 static void
  98 dead_group_list(struct uct *u, struct board *b, struct move_queue *mq)
  99 {
 100         struct group_judgement gj;
 101         gj.thres = GJ_THRES;
 102         gj.gs = alloca(board_size2(b) * sizeof(gj.gs[0]));
 103         board_ownermap_judge_group(b, &u->ownermap, &gj);
 104         groups_of_status(b, &gj, GS_DEAD, mq);
 105 }
 106
 107 bool
 108 uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pass_all_alive)
 109 {
 110         if (u->ownermap.playouts < GJ_MINGAMES)
 111                 return false;
 112
 113         struct move_queue mq = { .moves = 0 };
 114         if (!pass_all_alive)
 115                 dead_group_list(u, b, &mq);
 116         return pass_is_safe(b, color, &mq);
 117 }
 118
 119
 120 static void
 121 uct_printhook_ownermap(struct board *board, coord_t c, FILE *f)
 122 {
 123         struct uct *u = board->es;
 124         assert(u);
 125         const char chr[] = ":XO,"; // dame, black, white, unclear
 126         const char chm[] = ":xo,";
 127         char ch = chr[board_ownermap_judge_point(&u->ownermap, c, GJ_THRES)];
 128         if (ch == ',') { // less precise estimate then?
 129                 ch = chm[board_ownermap_judge_point(&u->ownermap, c, 0.67)];
 130         }
 131         fprintf(f, "%c ", ch);
 132 }
 133
 134 static char *
 135 uct_notify_play(struct engine *e, struct board *b, struct move *m)
 136 {
 137         struct uct *u = e->data;
 138         if (!u->t) {
 139                 /* No state, create one - this is probably game beginning
 140                  * and we need to load the opening book right now. */
 141                 prepare_move(e, b, m->color);
 142                 assert(u->t);
 143         }
 144
 145         /* Stop pondering. */
 146         /* XXX: If we are about to receive multiple 'play' commands,
 147          * e.g. in a rengo, we will not ponder during the rest of them. */
 148         uct_pondering_stop(u);
 149
 150         if (is_resign(m->coord)) {
 151                 /* Reset state. */
 152                 reset_state(u);
 153                 return NULL;
 154         }
 155
 156         /* Promote node of the appropriate move to the tree root. */
 157         assert(u->t->root);
 158         if (!tree_promote_at(u->t, b, m->coord)) {
 159                 if (UDEBUGL(0))
 160                         fprintf(stderr, "Warning: Cannot promote move node! Several play commands in row?\n");
 161                 reset_state(u);
 162                 return NULL;
 163         }
 164
 165         return NULL;
 166 }
 167
 168 static char *
 169 uct_chat(struct engine *e, struct board *b, char *cmd)
 170 {
 171         struct uct *u = e->data;
 172         static char reply[1024];
 173
 174         cmd += strspn(cmd, " \n\t");
 175         if (!strncasecmp(cmd, "winrate", 7)) {
 176                 if (!u->t)
 177                         return "no game context (yet?)";
 178                 enum stone color = u->t->root_color;
 179                 struct tree_node *n = u->t->root;
 180                 snprintf(reply, 1024, "In %d*%d playouts, %s %s can win with %.2f%% probability",
 181                          n->u.playouts, u->threads, stone2str(color), coord2sstr(n->coord, b),
 182                          tree_node_get_value(u->t, -1, n->u.value) * 100);
 183                 if (abs(u->t->extra_komi) >= 0.5) {
 184                         sprintf(reply + strlen(reply), ", while self-imposing extra komi %.1f",
 185                                 u->t->extra_komi);
 186                 }
 187                 strcat(reply, ".");
 188                 return reply;
 189         }
 190         return NULL;
 191 }
 192
 193 static void
 194 uct_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
 195 {
 196         struct uct *u = e->data;
 197
 198         /* This means the game is probably over, no use pondering on. */
 199         uct_pondering_stop(u);
 200
 201         if (u->pass_all_alive)
 202                 return; // no dead groups
 203
 204         bool mock_state = false;
 205
 206         if (!u->t) {
 207                 /* No state, but we cannot just back out - we might
 208                  * have passed earlier, only assuming some stones are
 209                  * dead, and then re-connected, only to lose counting
 210                  * when all stones are assumed alive. */
 211                 /* Mock up some state and seed the ownermap by few
 212                  * simulations. */
 213                 prepare_move(e, b, S_BLACK); assert(u->t);
 214                 for (int i = 0; i < GJ_MINGAMES; i++)
 215                         uct_playout(u, b, S_BLACK, u->t);
 216                 mock_state = true;
 217         }
 218
 219         dead_group_list(u, b, mq);
 220
 221         if (mock_state) {
 222                 /* Clean up the mock state in case we will receive
 223                  * a genmove; we could get a non-alternating-move
 224                  * error from prepare_move() in that case otherwise. */
 225                 reset_state(u);
 226         }
 227 }
 228
 229 static void
 230 playout_policy_done(struct playout_policy *p)
 231 {
 232         if (p->done) p->done(p);
 233         if (p->data) free(p->data);
 234         free(p);
 235 }
 236
 237 static void
 238 uct_done(struct engine *e)
 239 {
 240         /* This is called on engine reset, especially when clear_board
 241          * is received and new game should begin. */
 242         struct uct *u = e->data;
 243         uct_pondering_stop(u);
 244         if (u->t) reset_state(u);
 245         free(u->ownermap.map);
 246
 247         free(u->policy);
 248         free(u->random_policy);
 249         playout_policy_done(u->playout);
 250         uct_prior_done(u->prior);
 251 }
 252
 253
 254 /* Pachi threading structure (if uct_playouts_parallel() is used):
 255  *
 256  * main thread
 257  *   |         main(), GTP communication, ...
 258  *   |         starts and stops the search managed by thread_manager
 259  *   |
 260  * thread_manager
 261  *   |         spawns and collects worker threads
 262  *   |
 263  * worker0
 264  * worker1
 265  * ...
 266  * workerK
 267  *             uct_playouts() loop, doing descend-playout until uct_halt
 268  *
 269  * Another way to look at it is by functions (lines denote thread boundaries):
 270  *
 271  * | uct_genmove()
 272  * | uct_playouts_threaded()
 273  * | -----------------------
 274  * | spawn_thread_manager()
 275  * | -----------------------
 276  * | spawn_worker()
 277  * V uct_playouts() */
 278
 279 /* Set in thread manager in case the workers should stop. */
 280 volatile sig_atomic_t uct_halt = 0;
 281 /* ID of the running worker thread. */
 282 __thread int thread_id = -1;
 283 /* ID of the thread manager. */
 284 static pthread_t thread_manager;
 285 static bool thread_manager_running;
 286
 287 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
 288 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
 289 static volatile int finish_thread;
 290 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
 291
 292 struct spawn_ctx {
 293         int tid;
 294         struct uct *u;
 295         struct board *b;
 296         enum stone color;
 297         struct tree *t;
 298         unsigned long seed;
 299         int games;
 300 };
 301
 302 static void *
 303 spawn_worker(void *ctx_)
 304 {
 305         struct spawn_ctx *ctx = ctx_;
 306         /* Setup */
 307         fast_srandom(ctx->seed);
 308         thread_id = ctx->tid;
 309         /* Run */
 310         ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t);
 311         /* Finish */
 312         pthread_mutex_lock(&finish_serializer);
 313         pthread_mutex_lock(&finish_mutex);
 314         finish_thread = ctx->tid;
 315         pthread_cond_signal(&finish_cond);
 316         pthread_mutex_unlock(&finish_mutex);
 317         return ctx;
 318 }
 319
 320 /* Thread manager, controlling worker threads. It must be called with
 321  * finish_mutex lock held, but it will unlock it itself before exiting;
 322  * this is necessary to be completely deadlock-free. */
 323 /* The finish_cond can be signalled for it to stop; in that case,
 324  * the caller should set finish_thread = -1. */
 325 /* After it is started, it will update mctx->t to point at some tree
 326  * used for the actual search (matters only for TM_ROOT), on return
 327  * it will set mctx->games to the number of performed simulations. */
 328 static void *
 329 spawn_thread_manager(void *ctx_)
 330 {
 331         /* In thread_manager, we use only some of the ctx fields. */
 332         struct spawn_ctx *mctx = ctx_;
 333         struct uct *u = mctx->u;
 334         struct tree *t = mctx->t;
 335         bool shared_tree = u->parallel_tree;
 336         fast_srandom(mctx->seed);
 337
 338         int played_games = 0;
 339         pthread_t threads[u->threads];
 340         int joined = 0;
 341
 342         uct_halt = 0;
 343
 344         /* Spawn threads... */
 345         for (int ti = 0; ti < u->threads; ti++) {
 346                 struct spawn_ctx *ctx = malloc(sizeof(*ctx));
 347                 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
 348                 mctx->t = ctx->t = shared_tree ? t : tree_copy(t);
 349                 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
 350                 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
 351                 if (UDEBUGL(2))
 352                         fprintf(stderr, "Spawned worker %d\n", ti);
 353         }
 354
 355         /* ...and collect them back: */
 356         while (joined < u->threads) {
 357                 /* Wait for some thread to finish... */
 358                 pthread_cond_wait(&finish_cond, &finish_mutex);
 359                 if (finish_thread < 0) {
 360                         /* Stop-by-caller. Tell the workers to wrap up. */
 361                         uct_halt = 1;
 362                         continue;
 363                 }
 364                 /* ...and gather its remnants. */
 365                 struct spawn_ctx *ctx;
 366                 pthread_join(threads[finish_thread], (void **) &ctx);
 367                 played_games += ctx->games;
 368                 joined++;
 369                 if (!shared_tree) {
 370                         if (ctx->t == mctx->t) mctx->t = t;
 371                         tree_merge(t, ctx->t);
 372                         tree_done(ctx->t);
 373                 }
 374                 free(ctx);
 375                 if (UDEBUGL(2))
 376                         fprintf(stderr, "Joined worker %d\n", finish_thread);
 377                 pthread_mutex_unlock(&finish_serializer);
 378         }
 379
 380         pthread_mutex_unlock(&finish_mutex);
 381
 382         if (!shared_tree)
 383                 tree_normalize(mctx->t, u->threads);
 384
 385         mctx->games = played_games;
 386         return mctx;
 387 }
 388
 389 static struct spawn_ctx *
 390 uct_search_start(struct uct *u, struct board *b, enum stone color, struct tree *t)
 391 {
 392         assert(u->threads > 0);
 393         assert(!thread_manager_running);
 394
 395         struct spawn_ctx ctx = { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536) };
 396         static struct spawn_ctx mctx; mctx = ctx;
 397         pthread_mutex_lock(&finish_mutex);
 398         pthread_create(&thread_manager, NULL, spawn_thread_manager, &mctx);
 399         thread_manager_running = true;
 400         return &mctx;
 401 }
 402
 403 static struct spawn_ctx *
 404 uct_search_stop(void)
 405 {
 406         assert(thread_manager_running);
 407
 408         /* Signal thread manager to stop the workers. */
 409         pthread_mutex_lock(&finish_mutex);
 410         finish_thread = -1;
 411         pthread_cond_signal(&finish_cond);
 412         pthread_mutex_unlock(&finish_mutex);
 413
 414         /* Collect the thread manager. */
 415         struct spawn_ctx *pctx;
 416         thread_manager_running = false;
 417         pthread_join(thread_manager, (void **) &pctx);
 418         return pctx;
 419 }
 420
 421
 422 /* Run time-limited MCTS search on foreground. */
 423 static int
 424 uct_playouts_threaded(struct uct *u, struct board *b, enum stone color, struct tree *t, int games)
 425 {
 426         /* Required games limit as to be seen in the tree root u.playouts. */
 427         int ngames = games * (u->thread_model == TM_ROOT ? 1 : u->threads);
 428         /* Number of already played games. */
 429         int pgames = t->root->u.playouts;
 430
 431         struct spawn_ctx *ctx = uct_search_start(u, b, color, t);
 432
 433         /* The search tree is ctx->t. This is normally == t, but in case of
 434          * TM_ROOT, it is one of the trees belonging to the independent
 435          * workers. It is important to reference ctx->t directly since the
 436          * thread manager will swap the tree pointer asynchronously. */
 437         /* XXX: This means TM_ROOT support is suboptimal since single stalled
 438          * thread can stall the others in case of limiting the search by game
 439          * count. However, TM_ROOT just does not deserve any more extra code
 440          * right now. */
 441
 442         /* Now, just periodically poll the search tree. */
 443         struct timespec busywait_interval = TREE_BUSYWAIT_INTERVAL;
 444         while (1) {
 445                 nanosleep(&busywait_interval, NULL);
 446
 447                 /* Did we play enough games? */
 448                 if (ctx->t->root->u.playouts - pgames > ngames)
 449                         break;
 450                 /* Won situation? */
 451                 struct tree_node *best = u->policy->choose(u->policy, ctx->t->root, b, color);
 452                 if (best && ((best->u.playouts >= 2000 && tree_node_get_value(ctx->t, 1, best->u.value) >= u->loss_threshold)
 453                              || (best->u.playouts >= 500 && tree_node_get_value(ctx->t, 1, best->u.value) >= 0.95)))
 454                         break;
 455         }
 456
 457         ctx = uct_search_stop();
 458         return ctx->games;
 459 }
 460
 461
 462 /* Start pondering background with @color to play. */
 463 static void
 464 uct_pondering_start(struct uct *u, struct board *b0, struct tree *t, enum stone color)
 465 {
 466         if (UDEBUGL(1))
 467                 fprintf(stderr, "Starting to ponder with color %s\n", stone2str(stone_other(color)));
 468
 469         /* We need a local board copy to ponder upon. */
 470         struct board *b = malloc(sizeof(*b)); board_copy(b, b0);
 471
 472         /* *b0 did not have the genmove'd move played yet. */
 473         struct move m = { t->root->coord, t->root_color };
 474         int res = board_play(b, &m);
 475         assert(res >= 0);
 476
 477         /* Start MCTS manager thread "headless". */
 478         uct_search_start(u, b, color, t);
 479 }
 480
 481 /* uct_search_stop() frontend for the pondering (non-genmove) mode. */
 482 static void
 483 uct_pondering_stop(struct uct *u)
 484 {
 485         if (!thread_manager_running)
 486                 return;
 487
 488         /* Stop the thread manager. */
 489         struct spawn_ctx *ctx = uct_search_stop();
 490         if (UDEBUGL(1))
 491                 fprintf(stderr, "Pondering yielded %d games\n", ctx->games);
 492         free(ctx->b);
 493 }
 494
 495
 496 static coord_t *
 497 uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive)
 498 {
 499         struct uct *u = e->data;
 500
 501         if (b->superko_violation) {
 502                 fprintf(stderr, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
 503                 fprintf(stderr, "Maybe you play with situational instead of positional superko?\n");
 504                 fprintf(stderr, "I'm going to ignore the violation, but note that I may miss\n");
 505                 fprintf(stderr, "some moves valid under this ruleset because of this.\n");
 506                 b->superko_violation = false;
 507         }
 508
 509         /* Seed the tree. */
 510         uct_pondering_stop(u);
 511         prepare_move(e, b, color);
 512         assert(u->t);
 513
 514         /* Determine number of simulations. */
 515         int games = u->games;
 516         if (u->t->root->children) {
 517                 int delta = u->t->root->u.playouts * 2 / 3;
 518                 if (u->parallel_tree) delta /= u->threads;
 519                 games -= delta;
 520         }
 521         /* else this is highly read-out but dead-end branch of opening book;
 522          * we need to start from scratch; XXX: Maybe actually base the readout
 523          * count based on number of playouts of best node? */
 524         if (games < u->games && UDEBUGL(2))
 525                 fprintf(stderr, "<pre-simulated %d games skipped>\n", u->games - games);
 526
 527         /* Perform the Monte Carlo Tree Search! */
 528         int played_games = uct_playouts_threaded(u, b, color, u->t, games);
 529
 530         if (UDEBUGL(2))
 531                 tree_dump(u->t, u->dumpthres);
 532
 533         /* Choose the best move from the tree. */
 534         struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color);
 535         if (!best) {
 536                 reset_state(u);
 537                 return coord_copy(pass);
 538         }
 539         if (UDEBUGL(0)) {
 540                 uct_progress_status(u, u->t, color, played_games);
 541         }
 542         if (UDEBUGL(1))
 543                 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d games)\n",
 544                         coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
 545                         tree_node_get_value(u->t, 1, best->u.value),
 546                         best->u.playouts, u->t->root->u.playouts, played_games);
 547         if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio && !is_pass(best->coord)) {
 548                 reset_state(u);
 549                 return coord_copy(resign);
 550         }
 551
 552         /* If the opponent just passed and we win counting, always
 553          * pass as well. */
 554         if (b->moves > 1 && is_pass(b->last_move.coord)) {
 555                 /* Make sure enough playouts are simulated. */
 556                 while (u->ownermap.playouts < GJ_MINGAMES)
 557                         uct_playout(u, b, color, u->t);
 558                 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
 559                         if (UDEBUGL(0))
 560                                 fprintf(stderr, "<Will rather pass, looks safe enough.>\n");
 561                         best->coord = pass;
 562                 }
 563         }
 564
 565         tree_promote_node(u->t, best);
 566         /* After a pass, pondering is harmful for two reasons:
 567          * (i) We might keep pondering even when the game is over.
 568          * Of course this is the case for opponent resign as well.
 569          * (ii) More importantly, the ownermap will get skewed since
 570          * the UCT will start cutting off any playouts. */
 571         if (u->pondering && !is_pass(best->coord)) {
 572                 uct_pondering_start(u, b, u->t, stone_other(color));
 573         }
 574         return coord_copy(best->coord);
 575 }
 576
 577
 578 bool
 579 uct_genbook(struct engine *e, struct board *b, enum stone color)
 580 {
 581         struct uct *u = e->data;
 582         if (!u->t) prepare_move(e, b, color);
 583         assert(u->t);
 584
 585         uct_playouts_threaded(u, b, color, u->t, u->games);
 586
 587         tree_save(u->t, b, u->games / 100);
 588
 589         return true;
 590 }
 591
 592 void
 593 uct_dumpbook(struct engine *e, struct board *b, enum stone color)
 594 {
 595         struct tree *t = tree_init(b, color);
 596         tree_load(t, b);
 597         tree_dump(t, 0);
 598         tree_done(t);
 599 }
 600
 601
 602 struct uct *
 603 uct_state_init(char *arg, struct board *b)
 604 {
 605         struct uct *u = calloc(1, sizeof(struct uct));
 606
 607         u->debug_level = 1;
 608         u->games = MC_GAMES;
 609         u->gamelen = MC_GAMELEN;
 610         u->mercymin = 0;
 611         u->expand_p = 2;
 612         u->dumpthres = 1000;
 613         u->playout_amaf = true;
 614         u->playout_amaf_nakade = false;
 615         u->amaf_prior = false;
 616         u->max_tree_size = 3072ULL * 1048576;
 617
 618         if (board_size(b) - 2 >= 19)
 619                 u->dynkomi = 200;
 620         u->dynkomi_mask = S_BLACK;
 621
 622         u->threads = 1;
 623         u->thread_model = TM_TREEVL;
 624         u->parallel_tree = true;
 625         u->virtual_loss = true;
 626
 627         u->val_scale = 0.02; u->val_points = 20;
 628
 629         if (arg) {
 630                 char *optspec, *next = arg;
 631                 while (*next) {
 632                         optspec = next;
 633                         next += strcspn(next, ",");
 634                         if (*next) { *next++ = 0; } else { *next = 0; }
 635
 636                         char *optname = optspec;
 637                         char *optval = strchr(optspec, '=');
 638                         if (optval) *optval++ = 0;
 639
 640                         if (!strcasecmp(optname, "debug")) {
 641                                 if (optval)
 642                                         u->debug_level = atoi(optval);
 643                                 else
 644                                         u->debug_level++;
 645                         } else if (!strcasecmp(optname, "games") && optval) {
 646                                 u->games = atoi(optval);
 647                         } else if (!strcasecmp(optname, "mercy") && optval) {
 648                                 /* Minimal difference of black/white captures
 649                                  * to stop playout - "Mercy Rule". Speeds up
 650                                  * hopeless playouts at the expense of some
 651                                  * accuracy. */
 652                                 u->mercymin = atoi(optval);
 653                         } else if (!strcasecmp(optname, "gamelen") && optval) {
 654                                 u->gamelen = atoi(optval);
 655                         } else if (!strcasecmp(optname, "expand_p") && optval) {
 656                                 u->expand_p = atoi(optval);
 657                         } else if (!strcasecmp(optname, "dumpthres") && optval) {
 658                                 u->dumpthres = atoi(optval);
 659                         } else if (!strcasecmp(optname, "playout_amaf")) {
 660                                 /* Whether to include random playout moves in
 661                                  * AMAF as well. (Otherwise, only tree moves
 662                                  * are included in AMAF. Of course makes sense
 663                                  * only in connection with an AMAF policy.) */
 664                                 /* with-without: 55.5% (+-4.1) */
 665                                 if (optval && *optval == '0')
 666                                         u->playout_amaf = false;
 667                                 else
 668                                         u->playout_amaf = true;
 669                         } else if (!strcasecmp(optname, "playout_amaf_nakade")) {
 670                                 /* Whether to include nakade moves from playouts
 671                                  * in the AMAF statistics; this tends to nullify
 672                                  * the playout_amaf effect by adding too much
 673                                  * noise. */
 674                                 if (optval && *optval == '0')
 675                                         u->playout_amaf_nakade = false;
 676                                 else
 677                                         u->playout_amaf_nakade = true;
 678                         } else if (!strcasecmp(optname, "playout_amaf_cutoff") && optval) {
 679                                 /* Keep only first N% of playout stage AMAF
 680                                  * information. */
 681                                 u->playout_amaf_cutoff = atoi(optval);
 682                         } else if ((!strcasecmp(optname, "policy") || !strcasecmp(optname, "random_policy")) && optval) {
 683                                 char *policyarg = strchr(optval, ':');
 684                                 struct uct_policy **p = !strcasecmp(optname, "policy") ? &u->policy : &u->random_policy;
 685                                 if (policyarg)
 686                                         *policyarg++ = 0;
 687                                 if (!strcasecmp(optval, "ucb1")) {
 688                                         *p = policy_ucb1_init(u, policyarg);
 689                                 } else if (!strcasecmp(optval, "ucb1amaf")) {
 690                                         *p = policy_ucb1amaf_init(u, policyarg);
 691                                 } else {
 692                                         fprintf(stderr, "UCT: Invalid tree policy %s\n", optval);
 693                                         exit(1);
 694                                 }
 695                         } else if (!strcasecmp(optname, "playout") && optval) {
 696                                 char *playoutarg = strchr(optval, ':');
 697                                 if (playoutarg)
 698                                         *playoutarg++ = 0;
 699                                 if (!strcasecmp(optval, "moggy")) {
 700                                         u->playout = playout_moggy_init(playoutarg);
 701                                 } else if (!strcasecmp(optval, "light")) {
 702                                         u->playout = playout_light_init(playoutarg);
 703                                 } else if (!strcasecmp(optval, "elo")) {
 704                                         u->playout = playout_elo_init(playoutarg);
 705                                 } else {
 706                                         fprintf(stderr, "UCT: Invalid playout policy %s\n", optval);
 707                                         exit(1);
 708                                 }
 709                         } else if (!strcasecmp(optname, "prior") && optval) {
 710                                 u->prior = uct_prior_init(optval, b);
 711                         } else if (!strcasecmp(optname, "amaf_prior") && optval) {
 712                                 u->amaf_prior = atoi(optval);
 713                         } else if (!strcasecmp(optname, "threads") && optval) {
 714                                 /* By default, Pachi will run with only single
 715                                  * tree search thread! */
 716                                 u->threads = atoi(optval);
 717                         } else if (!strcasecmp(optname, "thread_model") && optval) {
 718                                 if (!strcasecmp(optval, "root")) {
 719                                         /* Root parallelization - each thread
 720                                          * does independent search, trees are
 721                                          * merged at the end. */
 722                                         u->thread_model = TM_ROOT;
 723                                         u->parallel_tree = false;
 724                                         u->virtual_loss = false;
 725                                 } else if (!strcasecmp(optval, "tree")) {
 726                                         /* Tree parallelization - all threads
 727                                          * grind on the same tree. */
 728                                         u->thread_model = TM_TREE;
 729                                         u->parallel_tree = true;
 730                                         u->virtual_loss = false;
 731                                 } else if (!strcasecmp(optval, "treevl")) {
 732                                         /* Tree parallelization, but also
 733                                          * with virtual losses - this discou-
 734                                          * rages most threads choosing the
 735                                          * same tree branches to read. */
 736                                         u->thread_model = TM_TREEVL;
 737                                         u->parallel_tree = true;
 738                                         u->virtual_loss = true;
 739                                 } else {
 740                                         fprintf(stderr, "UCT: Invalid thread model %s\n", optval);
 741                                         exit(1);
 742                                 }
 743                         } else if (!strcasecmp(optname, "pondering")) {
 744                                 /* Keep searching even during opponent's turn. */
 745                                 u->pondering = !optval || atoi(optval);
 746                         } else if (!strcasecmp(optname, "force_seed") && optval) {
 747                                 u->force_seed = atoi(optval);
 748                         } else if (!strcasecmp(optname, "no_book")) {
 749                                 u->no_book = true;
 750                         } else if (!strcasecmp(optname, "dynkomi")) {
 751                                 /* Dynamic komi in handicap game; linearly
 752                                  * decreases to basic settings until move
 753                                  * #optval. */
 754                                 u->dynkomi = optval ? atoi(optval) : 150;
 755                         } else if (!strcasecmp(optname, "dynkomi_mask") && optval) {
 756                                 /* Bitmask of colors the player must be
 757                                  * for dynkomi be applied; you may want
 758                                  * to use dynkomi_mask=3 to allow dynkomi
 759                                  * even in games where Pachi is white. */
 760                                 u->dynkomi_mask = atoi(optval);
 761                         } else if (!strcasecmp(optname, "val_scale") && optval) {
 762                                 /* How much of the game result value should be
 763                                  * influenced by win size. Zero means it isn't. */
 764                                 u->val_scale = atof(optval);
 765                         } else if (!strcasecmp(optname, "val_points") && optval) {
 766                                 /* Maximum size of win to be scaled into game
 767                                  * result value. Zero means boardsize^2. */
 768                                 u->val_points = atoi(optval) * 2; // result values are doubled
 769                         } else if (!strcasecmp(optname, "val_extra")) {
 770                                 /* If false, the score coefficient will be simply
 771                                  * added to the value, instead of scaling the result
 772                                  * coefficient because of it. */
 773                                 u->val_extra = !optval || atoi(optval);
 774                         } else if (!strcasecmp(optname, "root_heuristic") && optval) {
 775                                 /* Whether to bias exploration by root node values
 776                                  * (must be supported by the used policy).
 777                                  * 0: Don't.
 778                                  * 1: Do, value = result.
 779                                  * Try to temper the result:
 780                                  * 2: Do, value = 0.5+(result-expected)/2.
 781                                  * 3: Do, value = 0.5+bzz((result-expected)^2). */
 782                                 u->root_heuristic = atoi(optval);
 783                         } else if (!strcasecmp(optname, "pass_all_alive")) {
 784                                 /* Whether to consider all stones alive at the game
 785                                  * end instead of marking dead groupd. */
 786                                 u->pass_all_alive = !optval || atoi(optval);
 787                         } else if (!strcasecmp(optname, "random_policy_chance") && optval) {
 788                                 /* If specified (N), with probability 1/N, random_policy policy
 789                                  * descend is used instead of main policy descend; useful
 790                                  * if specified policy (e.g. UCB1AMAF) can make unduly biased
 791                                  * choices sometimes, you can fall back to e.g.
 792                                  * random_policy=UCB1. */
 793                                 u->random_policy_chance = atoi(optval);
 794                         } else if (!strcasecmp(optname, "max_tree_size") && optval) {
 795                                 /* Maximum amount of memory [MiB] consumed by the move tree.
 796                                  * Default is 3072 (3 GiB). Note that if you use TM_ROOT,
 797                                  * this limits size of only one of the trees, not all of them
 798                                  * together. */
 799                                 u->max_tree_size = atol(optval) * 1048576;
 800                         } else if (!strcasecmp(optname, "banner") && optval) {
 801                                 /* Additional banner string. This must come as the
 802                                  * last engine parameter. */
 803                                 if (*next) *--next = ',';
 804                                 u->banner = strdup(optval);
 805                                 break;
 806                         } else {
 807                                 fprintf(stderr, "uct: Invalid engine argument %s or missing value\n", optname);
 808                                 exit(1);
 809                         }
 810                 }
 811         }
 812
 813         u->resign_ratio = 0.2; /* Resign when most games are lost. */
 814         u->loss_threshold = 0.85; /* Stop reading if after at least 5000 playouts this is best value. */
 815         if (!u->policy)
 816                 u->policy = policy_ucb1amaf_init(u, NULL);
 817
 818         if (!!u->random_policy_chance ^ !!u->random_policy) {
 819                 fprintf(stderr, "uct: Only one of random_policy and random_policy_chance is set\n");
 820                 exit(1);
 821         }
 822
 823         if (!u->prior)
 824                 u->prior = uct_prior_init(NULL, b);
 825
 826         if (!u->playout)
 827                 u->playout = playout_moggy_init(NULL);
 828         u->playout->debug_level = u->debug_level;
 829
 830         u->ownermap.map = malloc(board_size2(b) * sizeof(u->ownermap.map[0]));
 831
 832         /* Some things remain uninitialized for now - the opening book
 833          * is not loaded and the tree not set up. */
 834         /* This will be initialized in setup_state() at the first move
 835          * received/requested. This is because right now we are not aware
 836          * about any komi or handicap setup and such. */
 837
 838         return u;
 839 }
 840
 841 struct engine *
 842 engine_uct_init(char *arg, struct board *b)
 843 {
 844         struct uct *u = uct_state_init(arg, b);
 845         struct engine *e = calloc(1, sizeof(struct engine));
 846         e->name = "UCT Engine";
 847         e->printhook = uct_printhook_ownermap;
 848         e->notify_play = uct_notify_play;
 849         e->chat = uct_chat;
 850         e->genmove = uct_genmove;
 851         e->dead_group_list = uct_dead_group_list;
 852         e->done = uct_done;
 853         e->data = u;
 854
 855         const char banner[] = "I'm playing UCT. When I'm losing, I will resign, "
 856                 "if I think I win, I play until you pass. "
 857                 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
 858         if (!u->banner) u->banner = "";
 859         e->comment = malloc(sizeof(banner) + strlen(u->banner) + 1);
 860         sprintf(e->comment, "%s %s", banner, u->banner);
 861
 862         return e;
 863 }