UCT Threading: Fix stop-by-caller - spontaneous-stop deadlock
[pachi/json.git] / uct / uct.c
blob5d8a729ee9d7e743c26a85af8f2768179b7cb521
1 #include <assert.h>
2 #include <pthread.h>
3 #include <signal.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #define DEBUG
10 #include "debug.h"
11 #include "board.h"
12 #include "gtp.h"
13 #include "move.h"
14 #include "mq.h"
15 #include "playout.h"
16 #include "playout/elo.h"
17 #include "playout/moggy.h"
18 #include "playout/light.h"
19 #include "random.h"
20 #include "tactics.h"
21 #include "uct/internal.h"
22 #include "uct/prior.h"
23 #include "uct/tree.h"
24 #include "uct/uct.h"
25 #include "uct/walk.h"
27 struct uct_policy *policy_ucb1_init(struct uct *u, char *arg);
28 struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg);
29 static void uct_pondering_finish(struct uct *u);
32 #define MC_GAMES 80000
33 #define MC_GAMELEN MAX_GAMELEN
35 /* How big proportion of ownermap counts must be of one color to consider
36 * the point sure. */
37 #define GJ_THRES 0.8
38 /* How many games to consider at minimum before judging groups. */
39 #define GJ_MINGAMES 500
42 static void
43 setup_state(struct uct *u, struct board *b, enum stone color)
45 u->t = tree_init(b, color);
46 if (u->force_seed)
47 fast_srandom(u->force_seed);
48 if (UDEBUGL(0))
49 fprintf(stderr, "Fresh board with random seed %lu\n", fast_getseed());
50 //board_print(b, stderr);
51 if (!u->no_book && b->moves == 0) {
52 assert(color == S_BLACK);
53 tree_load(u->t, b);
57 static void
58 reset_state(struct uct *u)
60 assert(u->t);
61 tree_done(u->t); u->t = NULL;
64 static void
65 prepare_move(struct engine *e, struct board *b, enum stone color)
67 struct uct *u = e->data;
69 if (u->t) {
70 /* Verify that we have sane state. */
71 assert(b->es == u);
72 assert(u->t && b->moves);
73 if (color != stone_other(u->t->root_color)) {
74 fprintf(stderr, "Fatal: Non-alternating play detected %d %d\n",
75 color, u->t->root_color);
76 exit(1);
79 } else {
80 /* We need fresh state. */
81 b->es = u;
82 setup_state(u, b, color);
85 if (u->dynkomi && u->dynkomi > b->moves && (color & u->dynkomi_mask))
86 u->t->extra_komi = uct_get_extra_komi(u, b);
88 u->ownermap.playouts = 0;
89 memset(u->ownermap.map, 0, board_size2(b) * sizeof(u->ownermap.map[0]));
92 static void
93 dead_group_list(struct uct *u, struct board *b, struct move_queue *mq)
95 struct group_judgement gj;
96 gj.thres = GJ_THRES;
97 gj.gs = alloca(board_size2(b) * sizeof(gj.gs[0]));
98 board_ownermap_judge_group(b, &u->ownermap, &gj);
99 groups_of_status(b, &gj, GS_DEAD, mq);
102 bool
103 uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pass_all_alive)
105 if (u->ownermap.playouts < GJ_MINGAMES)
106 return false;
108 struct move_queue mq = { .moves = 0 };
109 if (!pass_all_alive)
110 dead_group_list(u, b, &mq);
111 return pass_is_safe(b, color, &mq);
115 static void
116 uct_printhook_ownermap(struct board *board, coord_t c, FILE *f)
118 struct uct *u = board->es;
119 assert(u);
120 const char chr[] = ":XO,"; // dame, black, white, unclear
121 const char chm[] = ":xo,";
122 char ch = chr[board_ownermap_judge_point(&u->ownermap, c, GJ_THRES)];
123 if (ch == ',') { // less precise estimate then?
124 ch = chm[board_ownermap_judge_point(&u->ownermap, c, 0.67)];
126 fprintf(f, "%c ", ch);
129 static char *
130 uct_notify_play(struct engine *e, struct board *b, struct move *m)
132 struct uct *u = e->data;
133 if (!u->t) {
134 /* No state, create one - this is probably game beginning
135 * and we need to load the opening book right now. */
136 prepare_move(e, b, m->color);
137 assert(u->t);
140 /* Stop pondering. */
141 /* XXX: If we are about to receive multiple 'play' commands,
142 * e.g. in a rengo, we will not ponder during the rest of them. */
143 uct_pondering_finish(u);
145 if (is_resign(m->coord)) {
146 /* Reset state. */
147 reset_state(u);
148 return NULL;
151 /* Promote node of the appropriate move to the tree root. */
152 assert(u->t->root);
153 if (!tree_promote_at(u->t, b, m->coord)) {
154 if (UDEBUGL(0))
155 fprintf(stderr, "Warning: Cannot promote move node! Several play commands in row?\n");
156 reset_state(u);
157 return NULL;
160 return NULL;
163 static char *
164 uct_chat(struct engine *e, struct board *b, char *cmd)
166 struct uct *u = e->data;
167 static char reply[1024];
169 cmd += strspn(cmd, " \n\t");
170 if (!strncasecmp(cmd, "winrate", 7)) {
171 if (!u->t)
172 return "no game context (yet?)";
173 enum stone color = u->t->root_color;
174 struct tree_node *n = u->t->root;
175 snprintf(reply, 1024, "In %d*%d playouts, %s %s can win with %.2f%% probability",
176 n->u.playouts, u->threads, stone2str(color), coord2sstr(n->coord, b),
177 tree_node_get_value(u->t, -1, n->u.value) * 100);
178 if (abs(u->t->extra_komi) >= 0.5) {
179 sprintf(reply + strlen(reply), ", while self-imposing extra komi %.1f",
180 u->t->extra_komi);
182 strcat(reply, ".");
183 return reply;
185 return NULL;
188 static void
189 uct_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
191 struct uct *u = e->data;
193 /* This means the game is probabl yover, no use pondering on. */
194 uct_pondering_finish(u);
196 if (u->pass_all_alive)
197 return; // no dead groups
199 bool mock_state = false;
201 if (!u->t) {
202 /* No state, but we cannot just back out - we might
203 * have passed earlier, only assuming some stones are
204 * dead, and then re-connected, only to lose counting
205 * when all stones are assumed alive. */
206 /* Mock up some state and seed the ownermap by few
207 * simulations. */
208 prepare_move(e, b, S_BLACK); assert(u->t);
209 for (int i = 0; i < GJ_MINGAMES; i++)
210 uct_playout(u, b, S_BLACK, u->t);
211 mock_state = true;
214 dead_group_list(u, b, mq);
216 if (mock_state) {
217 /* Clean up the mock state in case we will receive
218 * a genmove; we could get a non-alternating-move
219 * error from prepare_move() in that case otherwise. */
220 reset_state(u);
224 static void
225 playout_policy_done(struct playout_policy *p)
227 if (p->done) p->done(p);
228 if (p->data) free(p->data);
229 free(p);
232 static void
233 uct_done(struct engine *e)
235 /* This is called on engine reset, especially when clear_board
236 * is received and new game should begin. */
237 struct uct *u = e->data;
238 uct_pondering_finish(u);
239 if (u->t) reset_state(u);
240 free(u->ownermap.map);
242 free(u->policy);
243 free(u->random_policy);
244 playout_policy_done(u->playout);
245 uct_prior_done(u->prior);
249 /* Pachi threading structure (if uct_playouts_parallel() is used):
251 * main thread
252 * | main(), GTP communication, ...
254 * thread_manager
255 * | spawns and manages worker threads
257 * worker0
258 * worker1
259 * ...
260 * workerK
261 * uct_playouts() loop, doing descend-playout N=games times
264 /* Set in thread manager in case the workers should stop. */
265 volatile sig_atomic_t uct_halt = 0;
266 /* ID of the running worker thread. */
267 __thread int thread_id = -1;
268 /* ID of the thread manager. */
269 static pthread_t thread_manager;
270 static bool thread_manager_running;
272 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
273 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
274 static volatile int finish_thread;
275 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
277 struct spawn_ctx {
278 int tid;
279 struct uct *u;
280 struct board *b;
281 enum stone color;
282 struct tree *t;
283 unsigned long seed;
284 int games;
287 static void *
288 spawn_worker(void *ctx_)
290 struct spawn_ctx *ctx = ctx_;
291 /* Setup */
292 fast_srandom(ctx->seed);
293 thread_id = ctx->tid;
294 /* Run */
295 ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->games);
296 /* Finish */
297 pthread_mutex_lock(&finish_serializer);
298 pthread_mutex_lock(&finish_mutex);
299 finish_thread = ctx->tid;
300 pthread_cond_signal(&finish_cond);
301 pthread_mutex_unlock(&finish_mutex);
302 return ctx;
305 /* Thread manager, controlling worker threads. It must be called with
306 * finish_mutex lock held, and the finish_cond can be signalled for it
307 * to stop; in that case, the caller should set finish_thread = -1. */
308 static void *
309 spawn_thread_manager(void *ctx_)
311 /* In thread_manager, we use only some of the ctx fields. */
312 struct spawn_ctx *mctx = ctx_;
313 struct uct *u = mctx->u;
314 bool shared_tree = u->parallel_tree;
315 fast_srandom(mctx->seed);
317 int played_games = 0;
318 pthread_t threads[u->threads];
319 int joined = 0;
321 pthread_mutex_lock(&finish_mutex);
322 uct_halt = 0;
324 /* Spawn threads... */
325 for (int ti = 0; ti < u->threads; ti++) {
326 struct spawn_ctx *ctx = malloc(sizeof(*ctx));
327 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
328 ctx->t = shared_tree ? mctx->t : tree_copy(mctx->t);
329 ctx->tid = ti; ctx->games = mctx->games;
330 ctx->seed = fast_random(65536) + ti;
331 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
332 if (UDEBUGL(2))
333 fprintf(stderr, "Spawned worker %d\n", ti);
336 /* ...and collect them back: */
337 while (joined < u->threads) {
338 /* Wait for some thread to finish... */
339 pthread_cond_wait(&finish_cond, &finish_mutex);
340 if (finish_thread < 0) {
341 /* Stop-by-caller. Tell the workers to wrap up. */
342 uct_halt = 1;
343 continue;
345 /* ...and gather its remnants. */
346 struct spawn_ctx *ctx;
347 pthread_join(threads[finish_thread], (void **) &ctx);
348 played_games += ctx->games;
349 joined++;
350 if (!shared_tree) {
351 tree_merge(mctx->t, ctx->t);
352 tree_done(ctx->t);
354 free(ctx);
355 if (UDEBUGL(2))
356 fprintf(stderr, "Joined worker %d\n", finish_thread);
357 /* Do not get stalled by slow threads. */
358 if (joined >= u->threads / 2)
359 uct_halt = 1;
360 pthread_mutex_unlock(&finish_serializer);
363 pthread_mutex_unlock(&finish_mutex);
365 if (!shared_tree)
366 tree_normalize(mctx->t, u->threads);
368 mctx->games = played_games;
369 return mctx;
372 static void
373 uct_pondering_start(struct uct *u, struct board *b0, enum stone color, struct tree *t, int games)
375 assert(u->threads > 0);
376 assert(!thread_manager_running);
378 /* *b0 can change in the meantime. */
379 struct board b; board_copy(&b, b0);
381 struct spawn_ctx ctx = { .u = u, .b = &b, .color = color, .t = t, .games = games, .seed = fast_random(65536) };
382 static struct spawn_ctx mctx; mctx = ctx;
383 pthread_create(&thread_manager, NULL, spawn_thread_manager, &mctx);
384 thread_manager_running = true;
387 static int
388 uct_pondering_stop(void)
390 assert(thread_manager_running);
392 struct spawn_ctx *pctx;
393 thread_manager_running = false;
394 pthread_join(thread_manager, (void **) &pctx);
395 return pctx->games;
398 /* uct_pondering_stop() frontend for the pondering (non-genmove) mode. */
399 static void
400 uct_pondering_finish(struct uct *u)
402 if (!thread_manager_running)
403 return;
405 /* Signal thread manager to stop the workers. */
406 pthread_mutex_lock(&finish_mutex);
407 finish_thread = -1;
408 pthread_cond_signal(&finish_cond);
409 pthread_mutex_unlock(&finish_mutex);
411 /* Collect thread manager. */
412 int games = uct_pondering_stop();
413 if (UDEBUGL(1))
414 fprintf(stderr, "Pondering yielded %d games\n", games);
417 static int
418 uct_playouts_threaded(struct uct *u, struct board *b, enum stone color, struct tree *t, int games)
420 uct_pondering_start(u, b, color, t, games);
421 /* We just wait until the thread manager finishes. */
422 return uct_pondering_stop();
426 static coord_t *
427 uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive)
429 struct uct *u = e->data;
431 if (b->superko_violation) {
432 fprintf(stderr, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
433 fprintf(stderr, "Maybe you play with situational instead of positional superko?\n");
434 fprintf(stderr, "I'm going to ignore the violation, but note that I may miss\n");
435 fprintf(stderr, "some moves valid under this ruleset because of this.\n");
436 b->superko_violation = false;
439 /* Seed the tree. */
440 uct_pondering_finish(u);
441 prepare_move(e, b, color);
442 assert(u->t);
444 /* Determine number of simulations. */
445 int games = u->games;
446 if (u->t->root->children) {
447 int delta = u->t->root->u.playouts * 2 / 3;
448 if (u->parallel_tree) delta /= u->threads;
449 games -= delta;
451 /* else this is highly read-out but dead-end branch of opening book;
452 * we need to start from scratch; XXX: Maybe actually base the readout
453 * count based on number of playouts of best node? */
454 if (games < u->games && UDEBUGL(2))
455 fprintf(stderr, "<pre-simulated %d games skipped>\n", u->games - games);
457 /* Perform the Monte Carlo Tree Search! */
458 int played_games = uct_playouts_threaded(u, b, color, u->t, games);
460 if (UDEBUGL(2))
461 tree_dump(u->t, u->dumpthres);
463 /* Choose the best move from the tree. */
464 struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color);
465 if (!best) {
466 reset_state(u);
467 return coord_copy(pass);
469 if (UDEBUGL(0)) {
470 uct_progress_status(u, u->t, color, played_games);
472 if (UDEBUGL(1))
473 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d games)\n",
474 coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
475 tree_node_get_value(u->t, 1, best->u.value),
476 best->u.playouts, u->t->root->u.playouts, played_games);
477 if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio && !is_pass(best->coord)) {
478 reset_state(u);
479 return coord_copy(resign);
482 /* If the opponent just passed and we win counting, always
483 * pass as well. */
484 if (b->moves > 1 && is_pass(b->last_move.coord)) {
485 /* Make sure enough playouts are simulated. */
486 while (u->ownermap.playouts < GJ_MINGAMES)
487 uct_playout(u, b, color, u->t);
488 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
489 if (UDEBUGL(0))
490 fprintf(stderr, "<Will rather pass, looks safe enough.>\n");
491 best->coord = pass;
495 tree_promote_node(u->t, best);
496 if (u->pondering) {
497 if (UDEBUGL(1))
498 fprintf(stderr, "Starting to ponder with color %s\n", stone2str(stone_other(color)));
499 uct_pondering_start(u, b, stone_other(color), u->t, 0);
501 return coord_copy(best->coord);
505 bool
506 uct_genbook(struct engine *e, struct board *b, enum stone color)
508 struct uct *u = e->data;
509 if (!u->t) prepare_move(e, b, color);
510 assert(u->t);
512 uct_playouts_threaded(u, b, color, u->t, u->games);
514 tree_save(u->t, b, u->games / 100);
516 return true;
519 void
520 uct_dumpbook(struct engine *e, struct board *b, enum stone color)
522 struct tree *t = tree_init(b, color);
523 tree_load(t, b);
524 tree_dump(t, 0);
525 tree_done(t);
529 struct uct *
530 uct_state_init(char *arg, struct board *b)
532 struct uct *u = calloc(1, sizeof(struct uct));
534 u->debug_level = 1;
535 u->games = MC_GAMES;
536 u->gamelen = MC_GAMELEN;
537 u->mercymin = 0;
538 u->expand_p = 2;
539 u->dumpthres = 1000;
540 u->playout_amaf = true;
541 u->playout_amaf_nakade = false;
542 u->amaf_prior = false;
543 u->max_tree_size = 3072ULL * 1048576;
545 if (board_size(b) - 2 >= 19)
546 u->dynkomi = 200;
547 u->dynkomi_mask = S_BLACK;
549 u->threads = 1;
550 u->thread_model = TM_TREEVL;
551 u->parallel_tree = true;
552 u->virtual_loss = true;
554 u->val_scale = 0.02; u->val_points = 20;
556 if (arg) {
557 char *optspec, *next = arg;
558 while (*next) {
559 optspec = next;
560 next += strcspn(next, ",");
561 if (*next) { *next++ = 0; } else { *next = 0; }
563 char *optname = optspec;
564 char *optval = strchr(optspec, '=');
565 if (optval) *optval++ = 0;
567 if (!strcasecmp(optname, "debug")) {
568 if (optval)
569 u->debug_level = atoi(optval);
570 else
571 u->debug_level++;
572 } else if (!strcasecmp(optname, "games") && optval) {
573 u->games = atoi(optval);
574 } else if (!strcasecmp(optname, "mercy") && optval) {
575 /* Minimal difference of black/white captures
576 * to stop playout - "Mercy Rule". Speeds up
577 * hopeless playouts at the expense of some
578 * accuracy. */
579 u->mercymin = atoi(optval);
580 } else if (!strcasecmp(optname, "gamelen") && optval) {
581 u->gamelen = atoi(optval);
582 } else if (!strcasecmp(optname, "expand_p") && optval) {
583 u->expand_p = atoi(optval);
584 } else if (!strcasecmp(optname, "dumpthres") && optval) {
585 u->dumpthres = atoi(optval);
586 } else if (!strcasecmp(optname, "playout_amaf")) {
587 /* Whether to include random playout moves in
588 * AMAF as well. (Otherwise, only tree moves
589 * are included in AMAF. Of course makes sense
590 * only in connection with an AMAF policy.) */
591 /* with-without: 55.5% (+-4.1) */
592 if (optval && *optval == '0')
593 u->playout_amaf = false;
594 else
595 u->playout_amaf = true;
596 } else if (!strcasecmp(optname, "playout_amaf_nakade")) {
597 /* Whether to include nakade moves from playouts
598 * in the AMAF statistics; this tends to nullify
599 * the playout_amaf effect by adding too much
600 * noise. */
601 if (optval && *optval == '0')
602 u->playout_amaf_nakade = false;
603 else
604 u->playout_amaf_nakade = true;
605 } else if (!strcasecmp(optname, "playout_amaf_cutoff") && optval) {
606 /* Keep only first N% of playout stage AMAF
607 * information. */
608 u->playout_amaf_cutoff = atoi(optval);
609 } else if ((!strcasecmp(optname, "policy") || !strcasecmp(optname, "random_policy")) && optval) {
610 char *policyarg = strchr(optval, ':');
611 struct uct_policy **p = !strcasecmp(optname, "policy") ? &u->policy : &u->random_policy;
612 if (policyarg)
613 *policyarg++ = 0;
614 if (!strcasecmp(optval, "ucb1")) {
615 *p = policy_ucb1_init(u, policyarg);
616 } else if (!strcasecmp(optval, "ucb1amaf")) {
617 *p = policy_ucb1amaf_init(u, policyarg);
618 } else {
619 fprintf(stderr, "UCT: Invalid tree policy %s\n", optval);
620 exit(1);
622 } else if (!strcasecmp(optname, "playout") && optval) {
623 char *playoutarg = strchr(optval, ':');
624 if (playoutarg)
625 *playoutarg++ = 0;
626 if (!strcasecmp(optval, "moggy")) {
627 u->playout = playout_moggy_init(playoutarg);
628 } else if (!strcasecmp(optval, "light")) {
629 u->playout = playout_light_init(playoutarg);
630 } else if (!strcasecmp(optval, "elo")) {
631 u->playout = playout_elo_init(playoutarg);
632 } else {
633 fprintf(stderr, "UCT: Invalid playout policy %s\n", optval);
634 exit(1);
636 } else if (!strcasecmp(optname, "prior") && optval) {
637 u->prior = uct_prior_init(optval, b);
638 } else if (!strcasecmp(optname, "amaf_prior") && optval) {
639 u->amaf_prior = atoi(optval);
640 } else if (!strcasecmp(optname, "threads") && optval) {
641 /* By default, Pachi will run with only single
642 * tree search thread! */
643 u->threads = atoi(optval);
644 } else if (!strcasecmp(optname, "thread_model") && optval) {
645 if (!strcasecmp(optval, "root")) {
646 /* Root parallelization - each thread
647 * does independent search, trees are
648 * merged at the end. */
649 u->thread_model = TM_ROOT;
650 u->parallel_tree = false;
651 u->virtual_loss = false;
652 } else if (!strcasecmp(optval, "tree")) {
653 /* Tree parallelization - all threads
654 * grind on the same tree. */
655 u->thread_model = TM_TREE;
656 u->parallel_tree = true;
657 u->virtual_loss = false;
658 } else if (!strcasecmp(optval, "treevl")) {
659 /* Tree parallelization, but also
660 * with virtual losses - this discou-
661 * rages most threads choosing the
662 * same tree branches to read. */
663 u->thread_model = TM_TREEVL;
664 u->parallel_tree = true;
665 u->virtual_loss = true;
666 } else {
667 fprintf(stderr, "UCT: Invalid thread model %s\n", optval);
668 exit(1);
670 } else if (!strcasecmp(optname, "pondering")) {
671 /* Keep searching even during opponent's turn. */
672 u->pondering = !optval || atoi(optval);
673 } else if (!strcasecmp(optname, "force_seed") && optval) {
674 u->force_seed = atoi(optval);
675 } else if (!strcasecmp(optname, "no_book")) {
676 u->no_book = true;
677 } else if (!strcasecmp(optname, "dynkomi")) {
678 /* Dynamic komi in handicap game; linearly
679 * decreases to basic settings until move
680 * #optval. */
681 u->dynkomi = optval ? atoi(optval) : 150;
682 } else if (!strcasecmp(optname, "dynkomi_mask") && optval) {
683 /* Bitmask of colors the player must be
684 * for dynkomi be applied; you may want
685 * to use dynkomi_mask=3 to allow dynkomi
686 * even in games where Pachi is white. */
687 u->dynkomi_mask = atoi(optval);
688 } else if (!strcasecmp(optname, "val_scale") && optval) {
689 /* How much of the game result value should be
690 * influenced by win size. Zero means it isn't. */
691 u->val_scale = atof(optval);
692 } else if (!strcasecmp(optname, "val_points") && optval) {
693 /* Maximum size of win to be scaled into game
694 * result value. Zero means boardsize^2. */
695 u->val_points = atoi(optval) * 2; // result values are doubled
696 } else if (!strcasecmp(optname, "val_extra")) {
697 /* If false, the score coefficient will be simply
698 * added to the value, instead of scaling the result
699 * coefficient because of it. */
700 u->val_extra = !optval || atoi(optval);
701 } else if (!strcasecmp(optname, "root_heuristic") && optval) {
702 /* Whether to bias exploration by root node values
703 * (must be supported by the used policy).
704 * 0: Don't.
705 * 1: Do, value = result.
706 * Try to temper the result:
707 * 2: Do, value = 0.5+(result-expected)/2.
708 * 3: Do, value = 0.5+bzz((result-expected)^2). */
709 u->root_heuristic = atoi(optval);
710 } else if (!strcasecmp(optname, "pass_all_alive")) {
711 /* Whether to consider all stones alive at the game
712 * end instead of marking dead groupd. */
713 u->pass_all_alive = !optval || atoi(optval);
714 } else if (!strcasecmp(optname, "random_policy_chance") && optval) {
715 /* If specified (N), with probability 1/N, random_policy policy
716 * descend is used instead of main policy descend; useful
717 * if specified policy (e.g. UCB1AMAF) can make unduly biased
718 * choices sometimes, you can fall back to e.g.
719 * random_policy=UCB1. */
720 u->random_policy_chance = atoi(optval);
721 } else if (!strcasecmp(optname, "max_tree_size") && optval) {
722 /* Maximum amount of memory [MiB] consumed by the move tree.
723 * Default is 3072 (3 GiB). Note that if you use TM_ROOT,
724 * this limits size of only one of the trees, not all of them
725 * together. */
726 u->max_tree_size = atol(optval) * 1048576;
727 } else if (!strcasecmp(optname, "banner") && optval) {
728 /* Additional banner string. This must come as the
729 * last engine parameter. */
730 if (*next) *--next = ',';
731 u->banner = strdup(optval);
732 break;
733 } else {
734 fprintf(stderr, "uct: Invalid engine argument %s or missing value\n", optname);
735 exit(1);
740 u->resign_ratio = 0.2; /* Resign when most games are lost. */
741 u->loss_threshold = 0.85; /* Stop reading if after at least 5000 playouts this is best value. */
742 if (!u->policy)
743 u->policy = policy_ucb1amaf_init(u, NULL);
745 if (!!u->random_policy_chance ^ !!u->random_policy) {
746 fprintf(stderr, "uct: Only one of random_policy and random_policy_chance is set\n");
747 exit(1);
750 if (!u->prior)
751 u->prior = uct_prior_init(NULL, b);
753 if (!u->playout)
754 u->playout = playout_moggy_init(NULL);
755 u->playout->debug_level = u->debug_level;
757 u->ownermap.map = malloc(board_size2(b) * sizeof(u->ownermap.map[0]));
759 /* Some things remain uninitialized for now - the opening book
760 * is not loaded and the tree not set up. */
761 /* This will be initialized in setup_state() at the first move
762 * received/requested. This is because right now we are not aware
763 * about any komi or handicap setup and such. */
765 return u;
768 struct engine *
769 engine_uct_init(char *arg, struct board *b)
771 struct uct *u = uct_state_init(arg, b);
772 struct engine *e = calloc(1, sizeof(struct engine));
773 e->name = "UCT Engine";
774 e->printhook = uct_printhook_ownermap;
775 e->notify_play = uct_notify_play;
776 e->chat = uct_chat;
777 e->genmove = uct_genmove;
778 e->dead_group_list = uct_dead_group_list;
779 e->done = uct_done;
780 e->data = u;
782 const char banner[] = "I'm playing UCT. When I'm losing, I will resign, "
783 "if I think I win, I play until you pass. "
784 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
785 if (!u->banner) u->banner = "";
786 e->comment = malloc(sizeof(banner) + strlen(u->banner) + 1);
787 sprintf(e->comment, "%s %s", banner, u->banner);
789 return e;