uct_dead_group_list(): Stop pondering
[pachi.git] / uct / uct.c
blobd3faa22dfc7f6d061560e1de5fec67468d8de47b
1 #include <assert.h>
2 #include <pthread.h>
3 #include <signal.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #define DEBUG
10 #include "debug.h"
11 #include "board.h"
12 #include "gtp.h"
13 #include "move.h"
14 #include "mq.h"
15 #include "playout.h"
16 #include "playout/elo.h"
17 #include "playout/moggy.h"
18 #include "playout/light.h"
19 #include "random.h"
20 #include "tactics.h"
21 #include "uct/internal.h"
22 #include "uct/prior.h"
23 #include "uct/tree.h"
24 #include "uct/uct.h"
25 #include "uct/walk.h"
27 struct uct_policy *policy_ucb1_init(struct uct *u, char *arg);
28 struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg);
29 static void uct_pondering_finish(struct uct *u);
32 #define MC_GAMES 80000
33 #define MC_GAMELEN MAX_GAMELEN
35 /* How big proportion of ownermap counts must be of one color to consider
36 * the point sure. */
37 #define GJ_THRES 0.8
38 /* How many games to consider at minimum before judging groups. */
39 #define GJ_MINGAMES 500
42 static void
43 setup_state(struct uct *u, struct board *b, enum stone color)
45 u->t = tree_init(b, color);
46 if (u->force_seed)
47 fast_srandom(u->force_seed);
48 if (UDEBUGL(0))
49 fprintf(stderr, "Fresh board with random seed %lu\n", fast_getseed());
50 //board_print(b, stderr);
51 if (!u->no_book && b->moves == 0) {
52 assert(color == S_BLACK);
53 tree_load(u->t, b);
57 static void
58 reset_state(struct uct *u)
60 assert(u->t);
61 tree_done(u->t); u->t = NULL;
64 static void
65 prepare_move(struct engine *e, struct board *b, enum stone color)
67 struct uct *u = e->data;
69 if (u->t) {
70 /* Verify that we have sane state. */
71 assert(b->es == u);
72 assert(u->t && b->moves);
73 if (color != stone_other(u->t->root_color)) {
74 fprintf(stderr, "Fatal: Non-alternating play detected %d %d\n",
75 color, u->t->root_color);
76 exit(1);
79 } else {
80 /* We need fresh state. */
81 b->es = u;
82 setup_state(u, b, color);
85 if (u->dynkomi && u->dynkomi > b->moves && (color & u->dynkomi_mask))
86 u->t->extra_komi = uct_get_extra_komi(u, b);
88 u->ownermap.playouts = 0;
89 memset(u->ownermap.map, 0, board_size2(b) * sizeof(u->ownermap.map[0]));
92 static void
93 dead_group_list(struct uct *u, struct board *b, struct move_queue *mq)
95 struct group_judgement gj;
96 gj.thres = GJ_THRES;
97 gj.gs = alloca(board_size2(b) * sizeof(gj.gs[0]));
98 board_ownermap_judge_group(b, &u->ownermap, &gj);
99 groups_of_status(b, &gj, GS_DEAD, mq);
102 bool
103 uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pass_all_alive)
105 if (u->ownermap.playouts < GJ_MINGAMES)
106 return false;
108 struct move_queue mq = { .moves = 0 };
109 if (!pass_all_alive)
110 dead_group_list(u, b, &mq);
111 return pass_is_safe(b, color, &mq);
115 static void
116 uct_printhook_ownermap(struct board *board, coord_t c, FILE *f)
118 struct uct *u = board->es;
119 assert(u);
120 const char chr[] = ":XO,"; // dame, black, white, unclear
121 const char chm[] = ":xo,";
122 char ch = chr[board_ownermap_judge_point(&u->ownermap, c, GJ_THRES)];
123 if (ch == ',') { // less precise estimate then?
124 ch = chm[board_ownermap_judge_point(&u->ownermap, c, 0.67)];
126 fprintf(f, "%c ", ch);
129 static char *
130 uct_notify_play(struct engine *e, struct board *b, struct move *m)
132 struct uct *u = e->data;
133 if (!u->t) {
134 /* No state, create one - this is probably game beginning
135 * and we need to load the opening book right now. */
136 prepare_move(e, b, m->color);
137 assert(u->t);
140 /* Stop pondering. */
141 /* XXX: If we are about to receive multiple 'play' commands,
142 * e.g. in a rengo, we will not ponder during the rest of them. */
143 uct_pondering_finish(u);
145 if (is_resign(m->coord)) {
146 /* Reset state. */
147 reset_state(u);
148 return NULL;
151 /* Promote node of the appropriate move to the tree root. */
152 assert(u->t->root);
153 if (!tree_promote_at(u->t, b, m->coord)) {
154 if (UDEBUGL(0))
155 fprintf(stderr, "Warning: Cannot promote move node! Several play commands in row?\n");
156 reset_state(u);
157 return NULL;
160 return NULL;
163 static char *
164 uct_chat(struct engine *e, struct board *b, char *cmd)
166 struct uct *u = e->data;
167 static char reply[1024];
169 cmd += strspn(cmd, " \n\t");
170 if (!strncasecmp(cmd, "winrate", 7)) {
171 if (!u->t)
172 return "no game context (yet?)";
173 enum stone color = u->t->root_color;
174 struct tree_node *n = u->t->root;
175 snprintf(reply, 1024, "In %d*%d playouts, %s %s can win with %.2f%% probability",
176 n->u.playouts, u->threads, stone2str(color), coord2sstr(n->coord, b),
177 tree_node_get_value(u->t, -1, n->u.value) * 100);
178 if (abs(u->t->extra_komi) >= 0.5) {
179 sprintf(reply + strlen(reply), ", while self-imposing extra komi %.1f",
180 u->t->extra_komi);
182 strcat(reply, ".");
183 return reply;
185 return NULL;
188 static void
189 uct_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
191 struct uct *u = e->data;
193 /* This means the game is probabl yover, no use pondering on. */
194 uct_pondering_finish(u);
196 if (u->pass_all_alive)
197 return; // no dead groups
199 bool mock_state = false;
201 if (!u->t) {
202 /* No state, but we cannot just back out - we might
203 * have passed earlier, only assuming some stones are
204 * dead, and then re-connected, only to lose counting
205 * when all stones are assumed alive. */
206 /* Mock up some state and seed the ownermap by few
207 * simulations. */
208 prepare_move(e, b, S_BLACK); assert(u->t);
209 for (int i = 0; i < GJ_MINGAMES; i++)
210 uct_playout(u, b, S_BLACK, u->t);
211 mock_state = true;
214 dead_group_list(u, b, mq);
216 if (mock_state) {
217 /* Clean up the mock state in case we will receive
218 * a genmove; we could get a non-alternating-move
219 * error from prepare_move() in that case otherwise. */
220 reset_state(u);
224 static void
225 playout_policy_done(struct playout_policy *p)
227 if (p->done) p->done(p);
228 if (p->data) free(p->data);
229 free(p);
232 static void
233 uct_done(struct engine *e)
235 /* This is called on engine reset, especially when clear_board
236 * is received and new game should begin. */
237 struct uct *u = e->data;
238 uct_pondering_finish(u);
239 if (u->t) reset_state(u);
240 free(u->ownermap.map);
242 free(u->policy);
243 free(u->random_policy);
244 playout_policy_done(u->playout);
245 uct_prior_done(u->prior);
249 /* Pachi threading structure (if uct_playouts_parallel() is used):
251 * main thread
252 * | main(), GTP communication, ...
254 * thread_manager
255 * | spawns and manages worker threads
257 * worker0
258 * worker1
259 * ...
260 * workerK
261 * uct_playouts() loop, doing descend-playout N=games times
264 /* Set in thread manager in case the workers should stop. */
265 volatile sig_atomic_t uct_halt = 0;
266 /* ID of the running worker thread. */
267 __thread int thread_id = -1;
268 /* ID of the thread manager. */
269 static pthread_t thread_manager;
270 static bool thread_manager_running;
272 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
273 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
274 static volatile int finish_thread;
275 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
277 struct spawn_ctx {
278 int tid;
279 struct uct *u;
280 struct board *b;
281 enum stone color;
282 struct tree *t;
283 unsigned long seed;
284 int games;
287 static void *
288 spawn_worker(void *ctx_)
290 struct spawn_ctx *ctx = ctx_;
291 /* Setup */
292 fast_srandom(ctx->seed);
293 thread_id = ctx->tid;
294 /* Run */
295 ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->games);
296 /* Finish */
297 pthread_mutex_lock(&finish_serializer);
298 pthread_mutex_lock(&finish_mutex);
299 finish_thread = ctx->tid;
300 pthread_cond_signal(&finish_cond);
301 pthread_mutex_unlock(&finish_mutex);
302 return ctx;
305 /* Thread manager, controlling worker threads. It must be called with
306 * finish_mutex lock held, and the finish_cond can be signalled for it
307 * to stop; in that case, the caller should set finish_thread = -1. */
308 static void *
309 spawn_thread_manager(void *ctx_)
311 /* In thread_manager, we use only some of the ctx fields. */
312 struct spawn_ctx *mctx = ctx_;
313 struct uct *u = mctx->u;
314 bool shared_tree = u->parallel_tree;
315 fast_srandom(mctx->seed);
317 int played_games = 0;
318 pthread_t threads[u->threads];
319 int joined = 0;
321 uct_halt = 0;
322 /* Spawn threads... */
323 for (int ti = 0; ti < u->threads; ti++) {
324 struct spawn_ctx *ctx = malloc(sizeof(*ctx));
325 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
326 ctx->t = shared_tree ? mctx->t : tree_copy(mctx->t);
327 ctx->tid = ti; ctx->games = mctx->games;
328 ctx->seed = fast_random(65536) + ti;
329 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
330 if (UDEBUGL(2))
331 fprintf(stderr, "Spawned worker %d\n", ti);
334 /* ...and collect them back: */
335 while (joined < u->threads) {
336 /* Wait for some thread to finish... */
337 pthread_cond_wait(&finish_cond, &finish_mutex);
338 if (finish_thread < 0) {
339 /* Stop-by-caller. Tell the workers to wrap up. */
340 uct_halt = 1;
341 continue;
343 /* ...and gather its remnants. */
344 struct spawn_ctx *ctx;
345 pthread_join(threads[finish_thread], (void **) &ctx);
346 played_games += ctx->games;
347 joined++;
348 if (!shared_tree) {
349 tree_merge(mctx->t, ctx->t);
350 tree_done(ctx->t);
352 free(ctx);
353 if (UDEBUGL(2))
354 fprintf(stderr, "Joined worker %d\n", finish_thread);
355 /* Do not get stalled by slow threads. */
356 if (joined >= u->threads / 2)
357 uct_halt = 1;
358 pthread_mutex_unlock(&finish_serializer);
361 if (!shared_tree)
362 tree_normalize(mctx->t, u->threads);
364 mctx->games = played_games;
365 return mctx;
368 static void
369 uct_pondering_start(struct uct *u, struct board *b0, enum stone color, struct tree *t, int games)
371 assert(u->threads > 0);
372 assert(!thread_manager_running);
374 /* *b0 can change in the meantime. */
375 struct board b; board_copy(&b, b0);
377 struct spawn_ctx ctx = { .u = u, .b = &b, .color = color, .t = t, .games = games, .seed = fast_random(65536) };
378 static struct spawn_ctx mctx; mctx = ctx;
379 pthread_mutex_lock(&finish_mutex);
380 pthread_create(&thread_manager, NULL, spawn_thread_manager, &mctx);
381 thread_manager_running = true;
384 static int
385 uct_pondering_stop(void)
387 assert(thread_manager_running);
389 struct spawn_ctx *pctx;
390 thread_manager_running = false;
391 pthread_join(thread_manager, (void **) &pctx);
392 pthread_mutex_unlock(&finish_mutex);
393 return pctx->games;
396 /* uct_pondering_stop() frontend for the pondering (non-genmove) mode. */
397 static void
398 uct_pondering_finish(struct uct *u)
400 if (!thread_manager_running)
401 return;
403 /* Signal thread manager to stop the workers. */
404 pthread_mutex_lock(&finish_mutex);
405 finish_thread = -1;
406 pthread_cond_signal(&finish_cond);
407 pthread_mutex_unlock(&finish_mutex);
409 /* Collect thread manager. */
410 int games = uct_pondering_stop();
411 if (UDEBUGL(1))
412 fprintf(stderr, "Pondering yielded %d games\n", games);
415 static int
416 uct_playouts_threaded(struct uct *u, struct board *b, enum stone color, struct tree *t, int games)
418 uct_pondering_start(u, b, color, t, games);
419 /* We just wait until the thread manager finishes. */
420 return uct_pondering_stop();
424 static coord_t *
425 uct_genmove(struct engine *e, struct board *b, enum stone color, bool pass_all_alive)
427 struct uct *u = e->data;
429 if (b->superko_violation) {
430 fprintf(stderr, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
431 fprintf(stderr, "Maybe you play with situational instead of positional superko?\n");
432 fprintf(stderr, "I'm going to ignore the violation, but note that I may miss\n");
433 fprintf(stderr, "some moves valid under this ruleset because of this.\n");
434 b->superko_violation = false;
437 /* Seed the tree. */
438 uct_pondering_finish(u);
439 prepare_move(e, b, color);
440 assert(u->t);
442 /* Determine number of simulations. */
443 int games = u->games;
444 if (u->t->root->children) {
445 int delta = u->t->root->u.playouts * 2 / 3;
446 if (u->parallel_tree) delta /= u->threads;
447 games -= delta;
449 /* else this is highly read-out but dead-end branch of opening book;
450 * we need to start from scratch; XXX: Maybe actually base the readout
451 * count based on number of playouts of best node? */
452 if (games < u->games && UDEBUGL(2))
453 fprintf(stderr, "<pre-simulated %d games skipped>\n", u->games - games);
455 /* Perform the Monte Carlo Tree Search! */
456 int played_games = uct_playouts_threaded(u, b, color, u->t, games);
458 if (UDEBUGL(2))
459 tree_dump(u->t, u->dumpthres);
461 /* Choose the best move from the tree. */
462 struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color);
463 if (!best) {
464 reset_state(u);
465 return coord_copy(pass);
467 if (UDEBUGL(0)) {
468 uct_progress_status(u, u->t, color, played_games);
470 if (UDEBUGL(1))
471 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d games)\n",
472 coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
473 tree_node_get_value(u->t, 1, best->u.value),
474 best->u.playouts, u->t->root->u.playouts, played_games);
475 if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio && !is_pass(best->coord)) {
476 reset_state(u);
477 return coord_copy(resign);
480 /* If the opponent just passed and we win counting, always
481 * pass as well. */
482 if (b->moves > 1 && is_pass(b->last_move.coord)) {
483 /* Make sure enough playouts are simulated. */
484 while (u->ownermap.playouts < GJ_MINGAMES)
485 uct_playout(u, b, color, u->t);
486 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
487 if (UDEBUGL(0))
488 fprintf(stderr, "<Will rather pass, looks safe enough.>\n");
489 best->coord = pass;
493 tree_promote_node(u->t, best);
494 if (u->pondering) {
495 if (UDEBUGL(1))
496 fprintf(stderr, "Starting to ponder with color %s\n", stone2str(stone_other(color)));
497 uct_pondering_start(u, b, stone_other(color), u->t, 0);
499 return coord_copy(best->coord);
503 bool
504 uct_genbook(struct engine *e, struct board *b, enum stone color)
506 struct uct *u = e->data;
507 if (!u->t) prepare_move(e, b, color);
508 assert(u->t);
510 uct_playouts_threaded(u, b, color, u->t, u->games);
512 tree_save(u->t, b, u->games / 100);
514 return true;
517 void
518 uct_dumpbook(struct engine *e, struct board *b, enum stone color)
520 struct tree *t = tree_init(b, color);
521 tree_load(t, b);
522 tree_dump(t, 0);
523 tree_done(t);
527 struct uct *
528 uct_state_init(char *arg, struct board *b)
530 struct uct *u = calloc(1, sizeof(struct uct));
532 u->debug_level = 1;
533 u->games = MC_GAMES;
534 u->gamelen = MC_GAMELEN;
535 u->mercymin = 0;
536 u->expand_p = 2;
537 u->dumpthres = 1000;
538 u->playout_amaf = true;
539 u->playout_amaf_nakade = false;
540 u->amaf_prior = false;
541 u->max_tree_size = 3072ULL * 1048576;
543 if (board_size(b) - 2 >= 19)
544 u->dynkomi = 200;
545 u->dynkomi_mask = S_BLACK;
547 u->threads = 1;
548 u->thread_model = TM_TREEVL;
549 u->parallel_tree = true;
550 u->virtual_loss = true;
552 u->val_scale = 0.02; u->val_points = 20;
554 if (arg) {
555 char *optspec, *next = arg;
556 while (*next) {
557 optspec = next;
558 next += strcspn(next, ",");
559 if (*next) { *next++ = 0; } else { *next = 0; }
561 char *optname = optspec;
562 char *optval = strchr(optspec, '=');
563 if (optval) *optval++ = 0;
565 if (!strcasecmp(optname, "debug")) {
566 if (optval)
567 u->debug_level = atoi(optval);
568 else
569 u->debug_level++;
570 } else if (!strcasecmp(optname, "games") && optval) {
571 u->games = atoi(optval);
572 } else if (!strcasecmp(optname, "mercy") && optval) {
573 /* Minimal difference of black/white captures
574 * to stop playout - "Mercy Rule". Speeds up
575 * hopeless playouts at the expense of some
576 * accuracy. */
577 u->mercymin = atoi(optval);
578 } else if (!strcasecmp(optname, "gamelen") && optval) {
579 u->gamelen = atoi(optval);
580 } else if (!strcasecmp(optname, "expand_p") && optval) {
581 u->expand_p = atoi(optval);
582 } else if (!strcasecmp(optname, "dumpthres") && optval) {
583 u->dumpthres = atoi(optval);
584 } else if (!strcasecmp(optname, "playout_amaf")) {
585 /* Whether to include random playout moves in
586 * AMAF as well. (Otherwise, only tree moves
587 * are included in AMAF. Of course makes sense
588 * only in connection with an AMAF policy.) */
589 /* with-without: 55.5% (+-4.1) */
590 if (optval && *optval == '0')
591 u->playout_amaf = false;
592 else
593 u->playout_amaf = true;
594 } else if (!strcasecmp(optname, "playout_amaf_nakade")) {
595 /* Whether to include nakade moves from playouts
596 * in the AMAF statistics; this tends to nullify
597 * the playout_amaf effect by adding too much
598 * noise. */
599 if (optval && *optval == '0')
600 u->playout_amaf_nakade = false;
601 else
602 u->playout_amaf_nakade = true;
603 } else if (!strcasecmp(optname, "playout_amaf_cutoff") && optval) {
604 /* Keep only first N% of playout stage AMAF
605 * information. */
606 u->playout_amaf_cutoff = atoi(optval);
607 } else if ((!strcasecmp(optname, "policy") || !strcasecmp(optname, "random_policy")) && optval) {
608 char *policyarg = strchr(optval, ':');
609 struct uct_policy **p = !strcasecmp(optname, "policy") ? &u->policy : &u->random_policy;
610 if (policyarg)
611 *policyarg++ = 0;
612 if (!strcasecmp(optval, "ucb1")) {
613 *p = policy_ucb1_init(u, policyarg);
614 } else if (!strcasecmp(optval, "ucb1amaf")) {
615 *p = policy_ucb1amaf_init(u, policyarg);
616 } else {
617 fprintf(stderr, "UCT: Invalid tree policy %s\n", optval);
618 exit(1);
620 } else if (!strcasecmp(optname, "playout") && optval) {
621 char *playoutarg = strchr(optval, ':');
622 if (playoutarg)
623 *playoutarg++ = 0;
624 if (!strcasecmp(optval, "moggy")) {
625 u->playout = playout_moggy_init(playoutarg);
626 } else if (!strcasecmp(optval, "light")) {
627 u->playout = playout_light_init(playoutarg);
628 } else if (!strcasecmp(optval, "elo")) {
629 u->playout = playout_elo_init(playoutarg);
630 } else {
631 fprintf(stderr, "UCT: Invalid playout policy %s\n", optval);
632 exit(1);
634 } else if (!strcasecmp(optname, "prior") && optval) {
635 u->prior = uct_prior_init(optval, b);
636 } else if (!strcasecmp(optname, "amaf_prior") && optval) {
637 u->amaf_prior = atoi(optval);
638 } else if (!strcasecmp(optname, "threads") && optval) {
639 /* By default, Pachi will run with only single
640 * tree search thread! */
641 u->threads = atoi(optval);
642 } else if (!strcasecmp(optname, "thread_model") && optval) {
643 if (!strcasecmp(optval, "root")) {
644 /* Root parallelization - each thread
645 * does independent search, trees are
646 * merged at the end. */
647 u->thread_model = TM_ROOT;
648 u->parallel_tree = false;
649 u->virtual_loss = false;
650 } else if (!strcasecmp(optval, "tree")) {
651 /* Tree parallelization - all threads
652 * grind on the same tree. */
653 u->thread_model = TM_TREE;
654 u->parallel_tree = true;
655 u->virtual_loss = false;
656 } else if (!strcasecmp(optval, "treevl")) {
657 /* Tree parallelization, but also
658 * with virtual losses - this discou-
659 * rages most threads choosing the
660 * same tree branches to read. */
661 u->thread_model = TM_TREEVL;
662 u->parallel_tree = true;
663 u->virtual_loss = true;
664 } else {
665 fprintf(stderr, "UCT: Invalid thread model %s\n", optval);
666 exit(1);
668 } else if (!strcasecmp(optname, "pondering")) {
669 /* Keep searching even during opponent's turn. */
670 u->pondering = !optval || atoi(optval);
671 } else if (!strcasecmp(optname, "force_seed") && optval) {
672 u->force_seed = atoi(optval);
673 } else if (!strcasecmp(optname, "no_book")) {
674 u->no_book = true;
675 } else if (!strcasecmp(optname, "dynkomi")) {
676 /* Dynamic komi in handicap game; linearly
677 * decreases to basic settings until move
678 * #optval. */
679 u->dynkomi = optval ? atoi(optval) : 150;
680 } else if (!strcasecmp(optname, "dynkomi_mask") && optval) {
681 /* Bitmask of colors the player must be
682 * for dynkomi be applied; you may want
683 * to use dynkomi_mask=3 to allow dynkomi
684 * even in games where Pachi is white. */
685 u->dynkomi_mask = atoi(optval);
686 } else if (!strcasecmp(optname, "val_scale") && optval) {
687 /* How much of the game result value should be
688 * influenced by win size. Zero means it isn't. */
689 u->val_scale = atof(optval);
690 } else if (!strcasecmp(optname, "val_points") && optval) {
691 /* Maximum size of win to be scaled into game
692 * result value. Zero means boardsize^2. */
693 u->val_points = atoi(optval) * 2; // result values are doubled
694 } else if (!strcasecmp(optname, "val_extra")) {
695 /* If false, the score coefficient will be simply
696 * added to the value, instead of scaling the result
697 * coefficient because of it. */
698 u->val_extra = !optval || atoi(optval);
699 } else if (!strcasecmp(optname, "root_heuristic") && optval) {
700 /* Whether to bias exploration by root node values
701 * (must be supported by the used policy).
702 * 0: Don't.
703 * 1: Do, value = result.
704 * Try to temper the result:
705 * 2: Do, value = 0.5+(result-expected)/2.
706 * 3: Do, value = 0.5+bzz((result-expected)^2). */
707 u->root_heuristic = atoi(optval);
708 } else if (!strcasecmp(optname, "pass_all_alive")) {
709 /* Whether to consider all stones alive at the game
710 * end instead of marking dead groupd. */
711 u->pass_all_alive = !optval || atoi(optval);
712 } else if (!strcasecmp(optname, "random_policy_chance") && optval) {
713 /* If specified (N), with probability 1/N, random_policy policy
714 * descend is used instead of main policy descend; useful
715 * if specified policy (e.g. UCB1AMAF) can make unduly biased
716 * choices sometimes, you can fall back to e.g.
717 * random_policy=UCB1. */
718 u->random_policy_chance = atoi(optval);
719 } else if (!strcasecmp(optname, "max_tree_size") && optval) {
720 /* Maximum amount of memory [MiB] consumed by the move tree.
721 * Default is 3072 (3 GiB). Note that if you use TM_ROOT,
722 * this limits size of only one of the trees, not all of them
723 * together. */
724 u->max_tree_size = atol(optval) * 1048576;
725 } else if (!strcasecmp(optname, "banner") && optval) {
726 /* Additional banner string. This must come as the
727 * last engine parameter. */
728 if (*next) *--next = ',';
729 u->banner = strdup(optval);
730 break;
731 } else {
732 fprintf(stderr, "uct: Invalid engine argument %s or missing value\n", optname);
733 exit(1);
738 u->resign_ratio = 0.2; /* Resign when most games are lost. */
739 u->loss_threshold = 0.85; /* Stop reading if after at least 5000 playouts this is best value. */
740 if (!u->policy)
741 u->policy = policy_ucb1amaf_init(u, NULL);
743 if (!!u->random_policy_chance ^ !!u->random_policy) {
744 fprintf(stderr, "uct: Only one of random_policy and random_policy_chance is set\n");
745 exit(1);
748 if (!u->prior)
749 u->prior = uct_prior_init(NULL, b);
751 if (!u->playout)
752 u->playout = playout_moggy_init(NULL);
753 u->playout->debug_level = u->debug_level;
755 u->ownermap.map = malloc(board_size2(b) * sizeof(u->ownermap.map[0]));
757 /* Some things remain uninitialized for now - the opening book
758 * is not loaded and the tree not set up. */
759 /* This will be initialized in setup_state() at the first move
760 * received/requested. This is because right now we are not aware
761 * about any komi or handicap setup and such. */
763 return u;
766 struct engine *
767 engine_uct_init(char *arg, struct board *b)
769 struct uct *u = uct_state_init(arg, b);
770 struct engine *e = calloc(1, sizeof(struct engine));
771 e->name = "UCT Engine";
772 e->printhook = uct_printhook_ownermap;
773 e->notify_play = uct_notify_play;
774 e->chat = uct_chat;
775 e->genmove = uct_genmove;
776 e->dead_group_list = uct_dead_group_list;
777 e->done = uct_done;
778 e->data = u;
780 const char banner[] = "I'm playing UCT. When I'm losing, I will resign, "
781 "if I think I win, I play until you pass. "
782 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
783 if (!u->banner) u->banner = "";
784 e->comment = malloc(sizeof(banner) + strlen(u->banner) + 1);
785 sprintf(e->comment, "%s %s", banner, u->banner);
787 return e;