uct_search(): Move state to struct uct_search_state, setup in uct_search_start()
[pachi/derm.git] / uct / uct.c
blobbdfefa19fb305f15630ab4261352552d030dc51a
1 #include <assert.h>
2 #include <math.h>
3 #include <pthread.h>
4 #include <signal.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
10 #define DEBUG
12 #include "debug.h"
13 #include "board.h"
14 #include "gtp.h"
15 #include "move.h"
16 #include "mq.h"
17 #include "playout.h"
18 #include "playout/elo.h"
19 #include "playout/moggy.h"
20 #include "playout/light.h"
21 #include "random.h"
22 #include "tactics.h"
23 #include "timeinfo.h"
24 #include "distributed/distributed.h"
25 #include "uct/dynkomi.h"
26 #include "uct/internal.h"
27 #include "uct/prior.h"
28 #include "uct/slave.h"
29 #include "uct/tree.h"
30 #include "uct/uct.h"
31 #include "uct/walk.h"
33 struct uct_policy *policy_ucb1_init(struct uct *u, char *arg);
34 struct uct_policy *policy_ucb1amaf_init(struct uct *u, char *arg);
35 static void uct_pondering_stop(struct uct *u);
36 static void uct_pondering_start(struct uct *u, struct board *b0, struct tree *t, enum stone color);
38 /* Default number of simulations to perform per move.
39 * Note that this is now in total over all threads! (Unless TM_ROOT.) */
40 #define MC_GAMES 80000
41 #define MC_GAMELEN MAX_GAMELEN
42 static const struct time_info default_ti = {
43 .period = TT_MOVE,
44 .dim = TD_GAMES,
45 .len = { .games = MC_GAMES },
48 /* How big proportion of ownermap counts must be of one color to consider
49 * the point sure. */
50 #define GJ_THRES 0.8
51 /* How many games to consider at minimum before judging groups. */
52 #define GJ_MINGAMES 500
54 /* How often to inspect the tree from the main thread to check for playout
55 * stop, progress reports, etc. (in seconds) */
56 #define TREE_BUSYWAIT_INTERVAL 0.1 /* 100ms */
58 /* Once per how many simulations (per thread) to show a progress report line. */
59 #define TREE_SIMPROGRESS_INTERVAL 10000
61 /* How often to send stats updates for the distributed engine (in seconds). */
62 #define STATS_SEND_INTERVAL 0.5
64 /* When terminating uct_search() early, the safety margin to add to the
65 * remaining playout number estimate when deciding whether the result can
66 * still change. */
67 #define PLAYOUT_DELTA_SAFEMARGIN 1000
70 static void
71 setup_state(struct uct *u, struct board *b, enum stone color)
73 u->t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, u->local_tree_aging);
74 if (u->force_seed)
75 fast_srandom(u->force_seed);
76 if (UDEBUGL(0))
77 fprintf(stderr, "Fresh board with random seed %lu\n", fast_getseed());
78 //board_print(b, stderr);
79 if (!u->no_book && b->moves == 0) {
80 assert(color == S_BLACK);
81 tree_load(u->t, b);
85 static void
86 reset_state(struct uct *u)
88 assert(u->t);
89 tree_done(u->t); u->t = NULL;
92 static void
93 setup_dynkomi(struct uct *u, struct board *b, enum stone to_play)
95 if (u->t->use_extra_komi && u->dynkomi->permove)
96 u->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, u->t);
99 void
100 uct_prepare_move(struct uct *u, struct board *b, enum stone color)
102 if (u->t) {
103 /* Verify that we have sane state. */
104 assert(b->es == u);
105 assert(u->t && b->moves);
106 if (color != stone_other(u->t->root_color)) {
107 fprintf(stderr, "Fatal: Non-alternating play detected %d %d\n",
108 color, u->t->root_color);
109 exit(1);
112 } else {
113 /* We need fresh state. */
114 b->es = u;
115 setup_state(u, b, color);
118 u->ownermap.playouts = 0;
119 memset(u->ownermap.map, 0, board_size2(b) * sizeof(u->ownermap.map[0]));
120 memset(u->stats, 0, board_size2(b) * sizeof(u->stats[0]));
121 u->played_own = u->played_all = 0;
124 static void
125 dead_group_list(struct uct *u, struct board *b, struct move_queue *mq)
127 struct group_judgement gj;
128 gj.thres = GJ_THRES;
129 gj.gs = alloca(board_size2(b) * sizeof(gj.gs[0]));
130 board_ownermap_judge_group(b, &u->ownermap, &gj);
131 groups_of_status(b, &gj, GS_DEAD, mq);
134 bool
135 uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pass_all_alive)
137 if (u->ownermap.playouts < GJ_MINGAMES)
138 return false;
140 struct move_queue mq = { .moves = 0 };
141 dead_group_list(u, b, &mq);
142 if (pass_all_alive && mq.moves > 0)
143 return false; // We need to remove some dead groups first.
144 return pass_is_safe(b, color, &mq);
147 /* This function is called only when running as slave in the distributed version. */
148 static enum parse_code
149 uct_notify(struct engine *e, struct board *b, int id, char *cmd, char *args, char **reply)
151 struct uct *u = e->data;
153 static bool board_resized = false;
154 board_resized |= is_gamestart(cmd);
156 /* Force resending the whole command history if we are out of sync
157 * but do it only once, not if already getting the history. */
158 if ((move_number(id) != b->moves || !board_resized)
159 && !reply_disabled(id) && !is_reset(cmd)) {
160 if (UDEBUGL(0))
161 fprintf(stderr, "Out of sync, id %d, move %d\n", id, b->moves);
162 static char buf[128];
163 snprintf(buf, sizeof(buf), "out of sync, move %d expected", b->moves);
164 *reply = buf;
165 return P_DONE_ERROR;
167 return reply_disabled(id) ? P_NOREPLY : P_OK;
170 static char *
171 uct_printhook_ownermap(struct board *board, coord_t c, char *s, char *end)
173 struct uct *u = board->es;
174 assert(u);
175 const char chr[] = ":XO,"; // dame, black, white, unclear
176 const char chm[] = ":xo,";
177 char ch = chr[board_ownermap_judge_point(&u->ownermap, c, GJ_THRES)];
178 if (ch == ',') { // less precise estimate then?
179 ch = chm[board_ownermap_judge_point(&u->ownermap, c, 0.67)];
181 s += snprintf(s, end - s, "%c ", ch);
182 return s;
185 static char *
186 uct_notify_play(struct engine *e, struct board *b, struct move *m)
188 struct uct *u = e->data;
189 if (!u->t) {
190 /* No state, create one - this is probably game beginning
191 * and we need to load the opening book right now. */
192 uct_prepare_move(u, b, m->color);
193 assert(u->t);
196 /* Stop pondering, required by tree_promote_at() */
197 uct_pondering_stop(u);
198 if (UDEBUGL(2) && u->slave)
199 tree_dump(u->t, u->dumpthres);
201 if (is_resign(m->coord)) {
202 /* Reset state. */
203 reset_state(u);
204 return NULL;
207 /* Promote node of the appropriate move to the tree root. */
208 assert(u->t->root);
209 if (!tree_promote_at(u->t, b, m->coord)) {
210 if (UDEBUGL(0))
211 fprintf(stderr, "Warning: Cannot promote move node! Several play commands in row?\n");
212 reset_state(u);
213 return NULL;
216 /* If we are a slave in a distributed engine, start pondering once
217 * we know which move we actually played. See uct_genmove() about
218 * the check for pass. */
219 if (u->pondering_opt && u->slave && m->color == u->my_color && !is_pass(m->coord))
220 uct_pondering_start(u, b, u->t, stone_other(m->color));
222 return NULL;
225 static char *
226 uct_chat(struct engine *e, struct board *b, char *cmd)
228 struct uct *u = e->data;
229 static char reply[1024];
231 cmd += strspn(cmd, " \n\t");
232 if (!strncasecmp(cmd, "winrate", 7)) {
233 if (!u->t)
234 return "no game context (yet?)";
235 enum stone color = u->t->root_color;
236 struct tree_node *n = u->t->root;
237 snprintf(reply, 1024, "In %d playouts at %d threads, %s %s can win with %.2f%% probability",
238 n->u.playouts, u->threads, stone2str(color), coord2sstr(n->coord, b),
239 tree_node_get_value(u->t, -1, n->u.value) * 100);
240 if (u->t->use_extra_komi && abs(u->t->extra_komi) >= 0.5) {
241 sprintf(reply + strlen(reply), ", while self-imposing extra komi %.1f",
242 u->t->extra_komi);
244 strcat(reply, ".");
245 return reply;
247 return NULL;
250 static void
251 uct_dead_group_list(struct engine *e, struct board *b, struct move_queue *mq)
253 struct uct *u = e->data;
255 /* This means the game is probably over, no use pondering on. */
256 uct_pondering_stop(u);
258 if (u->pass_all_alive)
259 return; // no dead groups
261 bool mock_state = false;
263 if (!u->t) {
264 /* No state, but we cannot just back out - we might
265 * have passed earlier, only assuming some stones are
266 * dead, and then re-connected, only to lose counting
267 * when all stones are assumed alive. */
268 /* Mock up some state and seed the ownermap by few
269 * simulations. */
270 uct_prepare_move(u, b, S_BLACK); assert(u->t);
271 for (int i = 0; i < GJ_MINGAMES; i++)
272 uct_playout(u, b, S_BLACK, u->t);
273 mock_state = true;
276 dead_group_list(u, b, mq);
278 if (mock_state) {
279 /* Clean up the mock state in case we will receive
280 * a genmove; we could get a non-alternating-move
281 * error from uct_prepare_move() in that case otherwise. */
282 reset_state(u);
286 static void
287 playout_policy_done(struct playout_policy *p)
289 if (p->done) p->done(p);
290 if (p->data) free(p->data);
291 free(p);
294 static void
295 uct_done(struct engine *e)
297 /* This is called on engine reset, especially when clear_board
298 * is received and new game should begin. */
299 struct uct *u = e->data;
300 uct_pondering_stop(u);
301 if (u->t) reset_state(u);
302 free(u->ownermap.map);
303 free(u->stats);
305 free(u->policy);
306 free(u->random_policy);
307 playout_policy_done(u->playout);
308 uct_prior_done(u->prior);
312 /* Pachi threading structure (if uct_playouts_parallel() is used):
314 * main thread
315 * | main(), GTP communication, ...
316 * | starts and stops the search managed by thread_manager
318 * thread_manager
319 * | spawns and collects worker threads
321 * worker0
322 * worker1
323 * ...
324 * workerK
325 * uct_playouts() loop, doing descend-playout until uct_halt
327 * Another way to look at it is by functions (lines denote thread boundaries):
329 * | uct_genmove()
330 * | uct_search() (uct_search_start() .. uct_search_stop())
331 * | -----------------------
332 * | spawn_thread_manager()
333 * | -----------------------
334 * | spawn_worker()
335 * V uct_playouts() */
337 /* Set in thread manager in case the workers should stop. */
338 volatile sig_atomic_t uct_halt = 0;
339 /* ID of the running worker thread. */
340 __thread int thread_id = -1;
341 /* ID of the thread manager. */
342 static pthread_t thread_manager;
343 bool thread_manager_running;
345 static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
346 static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
347 static volatile int finish_thread;
348 static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER;
350 struct spawn_ctx {
351 int tid;
352 struct uct *u;
353 struct board *b;
354 enum stone color;
355 struct tree *t;
356 unsigned long seed;
357 int games;
360 static void *
361 spawn_worker(void *ctx_)
363 struct spawn_ctx *ctx = ctx_;
364 /* Setup */
365 fast_srandom(ctx->seed);
366 thread_id = ctx->tid;
367 /* Run */
368 ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t);
369 /* Finish */
370 pthread_mutex_lock(&finish_serializer);
371 pthread_mutex_lock(&finish_mutex);
372 finish_thread = ctx->tid;
373 pthread_cond_signal(&finish_cond);
374 pthread_mutex_unlock(&finish_mutex);
375 return ctx;
378 /* Thread manager, controlling worker threads. It must be called with
379 * finish_mutex lock held, but it will unlock it itself before exiting;
380 * this is necessary to be completely deadlock-free. */
381 /* The finish_cond can be signalled for it to stop; in that case,
382 * the caller should set finish_thread = -1. */
383 /* After it is started, it will update mctx->t to point at some tree
384 * used for the actual search (matters only for TM_ROOT), on return
385 * it will set mctx->games to the number of performed simulations. */
386 static void *
387 spawn_thread_manager(void *ctx_)
389 /* In thread_manager, we use only some of the ctx fields. */
390 struct spawn_ctx *mctx = ctx_;
391 struct uct *u = mctx->u;
392 struct tree *t = mctx->t;
393 bool shared_tree = u->parallel_tree;
394 fast_srandom(mctx->seed);
396 int played_games = 0;
397 pthread_t threads[u->threads];
398 int joined = 0;
400 uct_halt = 0;
402 /* Garbage collect the tree by preference when pondering. */
403 if (u->pondering && t->nodes && t->nodes_size > t->max_tree_size/2) {
404 unsigned long temp_size = (MIN_FREE_MEM_PERCENT * t->max_tree_size) / 100;
405 t->root = tree_garbage_collect(t, temp_size, t->root);
408 /* Spawn threads... */
409 for (int ti = 0; ti < u->threads; ti++) {
410 struct spawn_ctx *ctx = malloc2(sizeof(*ctx));
411 ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color;
412 mctx->t = ctx->t = shared_tree ? t : tree_copy(t);
413 ctx->tid = ti; ctx->seed = fast_random(65536) + ti;
414 pthread_create(&threads[ti], NULL, spawn_worker, ctx);
415 if (UDEBUGL(3))
416 fprintf(stderr, "Spawned worker %d\n", ti);
419 /* ...and collect them back: */
420 while (joined < u->threads) {
421 /* Wait for some thread to finish... */
422 pthread_cond_wait(&finish_cond, &finish_mutex);
423 if (finish_thread < 0) {
424 /* Stop-by-caller. Tell the workers to wrap up. */
425 uct_halt = 1;
426 continue;
428 /* ...and gather its remnants. */
429 struct spawn_ctx *ctx;
430 pthread_join(threads[finish_thread], (void **) &ctx);
431 played_games += ctx->games;
432 joined++;
433 if (!shared_tree) {
434 if (ctx->t == mctx->t) mctx->t = t;
435 tree_merge(t, ctx->t);
436 tree_done(ctx->t);
438 free(ctx);
439 if (UDEBUGL(3))
440 fprintf(stderr, "Joined worker %d\n", finish_thread);
441 pthread_mutex_unlock(&finish_serializer);
444 pthread_mutex_unlock(&finish_mutex);
446 if (!shared_tree)
447 tree_normalize(mctx->t, u->threads);
449 mctx->games = played_games;
450 return mctx;
454 /* Progress information of the on-going MCTS search - when did we
455 * last adjusted dynkomi, printed out stuff, etc. */
456 struct uct_search_state {
457 /* Number of last dynkomi adjustment. */
458 int last_dynkomi;
459 /* Number of last game with progress print. */
460 int last_print;
461 /* Number of simulations to wait before next print. */
462 int print_interval;
463 /* Printed notification about full memory? */
464 bool print_fullmem;
466 struct time_stop stop;
467 struct spawn_ctx ctx;
470 static void
471 uct_search_start(struct uct *u, struct board *b, enum stone color,
472 struct tree *t, struct time_info *ti,
473 struct uct_search_state *s)
475 /* Set up search state. */
476 s->last_dynkomi = t->root->u.playouts;
477 s->last_print = t->root->u.playouts;
478 s->print_interval = TREE_SIMPROGRESS_INTERVAL * (u->thread_model == TM_ROOT ? 1 : u->threads);
479 s->print_fullmem = false;
481 if (ti) {
482 if (ti->period == TT_NULL) *ti = default_ti;
483 time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, &s->stop);
486 /* Fire up the tree search thread manager, which will in turn
487 * spawn the searching threads. */
488 assert(u->threads > 0);
489 assert(!thread_manager_running);
490 s->ctx = (struct spawn_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536) };
491 pthread_mutex_lock(&finish_mutex);
492 pthread_create(&thread_manager, NULL, spawn_thread_manager, &s->ctx);
493 thread_manager_running = true;
496 static struct spawn_ctx *
497 uct_search_stop(void)
499 assert(thread_manager_running);
501 /* Signal thread manager to stop the workers. */
502 pthread_mutex_lock(&finish_mutex);
503 finish_thread = -1;
504 pthread_cond_signal(&finish_cond);
505 pthread_mutex_unlock(&finish_mutex);
507 /* Collect the thread manager. */
508 struct spawn_ctx *pctx;
509 thread_manager_running = false;
510 pthread_join(thread_manager, (void **) &pctx);
511 return pctx;
515 /* Determine whether we should terminate the search early. */
516 static bool
517 uct_search_stop_early(struct uct *u, struct tree *t, struct board *b,
518 struct time_info *ti, struct time_stop *stop,
519 struct tree_node *best, struct tree_node *best2,
520 int played)
522 /* Always use at least half the desired time. It is silly
523 * to lose a won game because we played a bad move in 0.1s. */
524 double elapsed = 0;
525 if (ti->dim == TD_WALLTIME) {
526 elapsed = time_now() - ti->len.t.timer_start;
527 if (elapsed < 0.5 * stop->desired.time) return false;
530 /* Early break in won situation. */
531 if (best->u.playouts >= 2000 && tree_node_get_value(t, 1, best->u.value) >= u->loss_threshold)
532 return true;
533 /* Earlier break in super-won situation. */
534 if (best->u.playouts >= 500 && tree_node_get_value(t, 1, best->u.value) >= 0.95)
535 return true;
537 /* Break early if we estimate the second-best move cannot
538 * catch up in assigned time anymore. We use all our time
539 * if we are in byoyomi with single stone remaining in our
540 * period, however - it's better to pre-ponder. */
541 bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1);
542 if (best2 && ti->dim == TD_WALLTIME && !time_indulgent) {
543 double remaining = stop->worst.time - elapsed;
544 double pps = ((double)played) / elapsed;
545 double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN;
546 if (best->u.playouts > best2->u.playouts + estplayouts) {
547 if (UDEBUGL(2))
548 fprintf(stderr, "Early stop, result cannot change: "
549 "best %d, best2 %d, estimated %f simulations to go\n",
550 best->u.playouts, best2->u.playouts, estplayouts);
551 return true;
555 return false;
558 /* Determine whether we should terminate the search later than expected. */
559 static bool
560 uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b,
561 struct time_info *ti, struct time_stop *stop,
562 struct tree_node *best, struct tree_node *best2,
563 struct tree_node *bestr, struct tree_node *winner, int i)
565 if (!best) {
566 if (UDEBUGL(2))
567 fprintf(stderr, "Did not find best move, still trying...\n");
568 return true;
571 /* Do not waste time if we are winning. Spend up to worst time if
572 * we are unsure, but only desired time if we are sure of winning. */
573 float beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5);
574 if (ti->dim == TD_WALLTIME && beta > 0) {
575 double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta);
576 double elapsed = time_now() - ti->len.t.timer_start;
577 if (elapsed > good_enough) return false;
580 if (u->best2_ratio > 0) {
581 /* Check best/best2 simulations ratio. If the
582 * two best moves give very similar results,
583 * keep simulating. */
584 if (best2 && best2->u.playouts
585 && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) {
586 if (UDEBUGL(2))
587 fprintf(stderr, "Best2 ratio %f < threshold %f\n",
588 (double)best->u.playouts / best2->u.playouts,
589 u->best2_ratio);
590 return true;
594 if (u->bestr_ratio > 0) {
595 /* Check best, best_best value difference. If the best move
596 * and its best child do not give similar enough results,
597 * keep simulating. */
598 if (bestr && bestr->u.playouts
599 && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) {
600 if (UDEBUGL(2))
601 fprintf(stderr, "Bestr delta %f > threshold %f\n",
602 fabs((double)best->u.value - bestr->u.value),
603 u->bestr_ratio);
604 return true;
608 if (winner && winner != best) {
609 /* Keep simulating if best explored
610 * does not have also highest value. */
611 if (UDEBUGL(2))
612 fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i,
613 coord2sstr(best->coord, t->board),
614 best->u.playouts, tree_node_get_value(t, 1, best->u.value),
615 coord2sstr(winner->coord, t->board),
616 winner->u.playouts, tree_node_get_value(t, 1, winner->u.value));
617 return true;
620 /* No reason to keep simulating, bye. */
621 return false;
624 /* Run time-limited MCTS search. For a slave in the distributed
625 * engine, the search is done in background and will be stopped at
626 * the next uct_notify_play(); keep_looking is advice for the master. */
628 uct_search(struct uct *u, struct board *b, struct time_info *ti, enum stone color,
629 struct tree *t, bool *keep_looking)
631 int base_playouts = u->t->root->u.playouts;
632 if (UDEBUGL(2) && base_playouts > 0)
633 fprintf(stderr, "<pre-simulated %d games skipped>\n", base_playouts);
635 *keep_looking = false;
637 struct uct_search_state s;
638 if (!thread_manager_running) {
639 uct_search_start(u, b, color, t, ti, &s);
640 } else {
641 /* Keep the search running. */
642 assert(u->slave);
644 struct spawn_ctx *ctx = &s.ctx;
646 /* The search tree is ctx->t. This is normally == t, but in case of
647 * TM_ROOT, it is one of the trees belonging to the independent
648 * workers. It is important to reference ctx->t directly since the
649 * thread manager will swap the tree pointer asynchronously. */
650 /* XXX: This means TM_ROOT support is suboptimal since single stalled
651 * thread can stall the others in case of limiting the search by game
652 * count. However, TM_ROOT just does not deserve any more extra code
653 * right now. */
655 /* Now, just periodically poll the search tree. */
656 while (1) {
657 time_sleep(TREE_BUSYWAIT_INTERVAL);
658 /* TREE_BUSYWAIT_INTERVAL should never be less than desired time, or the
659 * time control is broken. But if it happens to be less, we still search
660 * at least 100ms otherwise the move is completely random. */
662 int i = ctx->t->root->u.playouts;
664 /* Adjust dynkomi? */
665 if (ctx->t->use_extra_komi && u->dynkomi->permove
666 && u->dynkomi_interval
667 && i > s.last_dynkomi + u->dynkomi_interval) {
668 s.last_dynkomi += u->dynkomi_interval;
669 float old_dynkomi = ctx->t->extra_komi;
670 ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t);
671 if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi)
672 fprintf(stderr, "dynkomi adjusted (%f -> %f)\n", old_dynkomi, ctx->t->extra_komi);
675 /* Print progress? */
676 if (i - s.last_print > s.print_interval) {
677 s.last_print += s.print_interval; // keep the numbers tidy
678 uct_progress_status(u, ctx->t, color, s.last_print);
680 if (!s.print_fullmem && ctx->t->nodes_size > u->max_tree_size) {
681 if (UDEBUGL(2))
682 fprintf(stderr, "memory limit hit (%lu > %lu)\n", ctx->t->nodes_size, u->max_tree_size);
683 s.print_fullmem = true;
686 /* Never consider stopping if we played too few simulations.
687 * Maybe we risk losing on time when playing in super-extreme
688 * time pressure but the tree is going to be just too messed
689 * up otherwise - we might even play invalid suicides or pass
690 * when we mustn't. */
691 if (i < GJ_MINGAMES)
692 continue;
694 struct tree_node *best = NULL;
695 struct tree_node *best2 = NULL; // Second-best move.
696 struct tree_node *bestr = NULL; // best's best child.
697 struct tree_node *winner = NULL;
699 best = u->policy->choose(u->policy, ctx->t->root, b, color, resign);
700 if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, best->coord);
702 /* Possibly stop search early if it's no use to try on. */
703 int played = u->played_all + i - base_playouts;
704 if (best && uct_search_stop_early(u, ctx->t, b, ti, &s.stop, best, best2, played))
705 break;
707 /* Check against time settings. */
708 bool desired_done;
709 if (ti->dim == TD_WALLTIME) {
710 double elapsed = time_now() - ti->len.t.timer_start;
711 if (elapsed > s.stop.worst.time) break;
712 desired_done = elapsed > s.stop.desired.time;
714 } else { assert(ti->dim == TD_GAMES);
715 if (i > s.stop.worst.playouts) break;
716 desired_done = i > s.stop.desired.playouts;
719 /* We want to stop simulating, but are willing to keep trying
720 * if we aren't completely sure about the winner yet. */
721 if (desired_done) {
722 if (u->policy->winner && u->policy->evaluate) {
723 struct uct_descent descent = { .node = ctx->t->root };
724 u->policy->winner(u->policy, ctx->t, &descent);
725 winner = descent.node;
727 if (best)
728 bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign);
729 if (!uct_search_keep_looking(u, ctx->t, b, ti, &s.stop, best, best2, bestr, winner, i))
730 break;
733 /* TODO: Early break if best->variance goes under threshold and we already
734 * have enough playouts (possibly thanks to book or to pondering)? */
736 /* If running as slave in the distributed engine,
737 * let the search continue in background. */
738 if (u->slave) {
739 *keep_looking = true;
740 break;
744 int games;
745 if (!u->slave) {
746 ctx = uct_search_stop();
747 games = ctx->games;
748 if (UDEBUGL(2)) tree_dump(t, u->dumpthres);
749 } else {
750 /* We can only return an estimate here. */
751 games = ctx->t->root->u.playouts - base_playouts;
753 if (UDEBUGL(2))
754 fprintf(stderr, "(avg score %f/%d value %f/%d)\n",
755 u->dynkomi->score.value, u->dynkomi->score.playouts,
756 u->dynkomi->value.value, u->dynkomi->value.playouts);
757 if (UDEBUGL(0))
758 uct_progress_status(u, t, color, games);
760 u->played_own += games;
761 return games;
765 /* Start pondering background with @color to play. */
766 static void
767 uct_pondering_start(struct uct *u, struct board *b0, struct tree *t, enum stone color)
769 if (UDEBUGL(1))
770 fprintf(stderr, "Starting to ponder with color %s\n", stone2str(stone_other(color)));
771 u->pondering = true;
773 /* We need a local board copy to ponder upon. */
774 struct board *b = malloc2(sizeof(*b)); board_copy(b, b0);
776 /* *b0 did not have the genmove'd move played yet. */
777 struct move m = { t->root->coord, t->root_color };
778 int res = board_play(b, &m);
779 assert(res >= 0);
780 setup_dynkomi(u, b, stone_other(m.color));
782 /* Start MCTS manager thread "headless". */
783 static struct uct_search_state s;
784 uct_search_start(u, b, color, t, NULL, &s);
787 /* uct_search_stop() frontend for the pondering (non-genmove) mode, and
788 * to stop the background search for a slave in the distributed engine. */
789 static void
790 uct_pondering_stop(struct uct *u)
792 if (!thread_manager_running)
793 return;
795 /* Stop the thread manager. */
796 struct spawn_ctx *ctx = uct_search_stop();
797 if (UDEBUGL(1)) {
798 if (u->pondering) fprintf(stderr, "(pondering) ");
799 uct_progress_status(u, ctx->t, ctx->color, ctx->games);
801 if (u->pondering) {
802 free(ctx->b);
803 u->pondering = false;
807 void
808 uct_search_setup(struct uct *u, struct board *b, enum stone color)
810 if (b->superko_violation) {
811 fprintf(stderr, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
812 fprintf(stderr, "Maybe you play with situational instead of positional superko?\n");
813 fprintf(stderr, "I'm going to ignore the violation, but note that I may miss\n");
814 fprintf(stderr, "some moves valid under this ruleset because of this.\n");
815 b->superko_violation = false;
818 uct_prepare_move(u, b, color);
820 assert(u->t);
821 u->my_color = color;
823 /* How to decide whether to use dynkomi in this game? Since we use
824 * pondering, it's not simple "who-to-play" matter. Decide based on
825 * the last genmove issued. */
826 u->t->use_extra_komi = !!(u->dynkomi_mask & color);
827 setup_dynkomi(u, b, color);
829 if (b->rules == RULES_JAPANESE)
830 u->territory_scoring = true;
832 /* Make pessimistic assumption about komi for Japanese rules to
833 * avoid losing by 0.5 when winning by 0.5 with Chinese rules.
834 * The rules usually give the same winner if the integer part of komi
835 * is odd so we adjust the komi only if it is even (for a board of
836 * odd size). We are not trying to get an exact evaluation for rare
837 * cases of seki. For details see http://home.snafu.de/jasiek/parity.html */
838 if (u->territory_scoring && (((int)floor(b->komi) + board_size(b)) & 1)) {
839 b->komi += (color == S_BLACK ? 1.0 : -1.0);
840 if (UDEBUGL(0))
841 fprintf(stderr, "Setting komi to %.1f assuming Japanese rules\n",
842 b->komi);
846 struct tree_node *
847 uct_search_best(struct uct *u, struct board *b, enum stone color,
848 bool pass_all_alive, int played_games, int base_playouts,
849 coord_t *best_coord)
851 /* Choose the best move from the tree. */
852 struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign);
853 if (!best) {
854 *best_coord = pass;
855 return NULL;
857 *best_coord = best->coord;
858 if (UDEBUGL(1))
859 fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n",
860 coord2sstr(best->coord, b), coord_x(best->coord, b), coord_y(best->coord, b),
861 tree_node_get_value(u->t, 1, best->u.value), best->u.playouts,
862 u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games,
863 u->t->extra_komi);
865 /* Do not resign if we're so short of time that evaluation of best
866 * move is completely unreliable, we might be winning actually.
867 * In this case best is almost random but still better than resign.
868 * Also do not resign if we are getting bad results while actually
869 * giving away extra komi points (dynkomi). */
870 if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_ratio
871 && !is_pass(best->coord) && best->u.playouts > GJ_MINGAMES
872 && u->t->extra_komi < 0.5 /* XXX we assume dynamic komi == we are black */) {
873 *best_coord = resign;
874 return NULL;
877 /* If the opponent just passed and we win counting, always
878 * pass as well. */
879 if (b->moves > 1 && is_pass(b->last_move.coord)) {
880 /* Make sure enough playouts are simulated. */
881 while (u->ownermap.playouts < GJ_MINGAMES)
882 uct_playout(u, b, color, u->t);
883 if (uct_pass_is_safe(u, b, color, u->pass_all_alive || pass_all_alive)) {
884 if (UDEBUGL(0))
885 fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n",
886 board_official_score(b, NULL) / 2);
887 *best_coord = pass;
888 return NULL;
892 return best;
895 static coord_t *
896 uct_genmove(struct engine *e, struct board *b, struct time_info *ti, enum stone color, bool pass_all_alive)
898 double start_time = time_now();
899 struct uct *u = e->data;
900 uct_pondering_stop(u);
901 uct_search_setup(u, b, color);
903 /* Start the Monte Carlo Tree Search! */
904 bool keep_looking;
905 int base_playouts = u->t->root->u.playouts;
906 int played_games = uct_search(u, b, ti, color, u->t, &keep_looking);
908 coord_t best_coord;
909 struct tree_node *best;
910 best = uct_search_best(u, b, color, pass_all_alive, played_games, base_playouts, &best_coord);
912 if (UDEBUGL(2)) {
913 double time = time_now() - start_time + 0.000001; /* avoid divide by zero */
914 fprintf(stderr, "genmove in %0.2fs (%d games/s, %d games/s/thread)\n",
915 time, (int)(played_games/time), (int)(played_games/time/u->threads));
918 if (!best) {
919 reset_state(u);
920 return coord_copy(best_coord);
922 tree_promote_node(u->t, &best);
924 /* After a pass, pondering is harmful for two reasons:
925 * (i) We might keep pondering even when the game is over.
926 * Of course this is the case for opponent resign as well.
927 * (ii) More importantly, the ownermap will get skewed since
928 * the UCT will start cutting off any playouts. */
929 if (u->pondering_opt && !is_pass(best->coord)) {
930 uct_pondering_start(u, b, u->t, stone_other(color));
932 return coord_copy(best_coord);
936 bool
937 uct_genbook(struct engine *e, struct board *b, struct time_info *ti, enum stone color)
939 struct uct *u = e->data;
940 if (!u->t) uct_prepare_move(u, b, color);
941 assert(u->t);
943 if (ti->dim == TD_GAMES) {
944 /* Don't count in games that already went into the book. */
945 ti->len.games += u->t->root->u.playouts;
947 bool keep_looking;
948 uct_search(u, b, ti, color, u->t, &keep_looking);
950 assert(ti->dim == TD_GAMES);
951 tree_save(u->t, b, ti->len.games / 100);
953 return true;
956 void
957 uct_dumpbook(struct engine *e, struct board *b, enum stone color)
959 struct uct *u = e->data;
960 struct tree *t = tree_init(b, color, u->fast_alloc ? u->max_tree_size : 0, u->local_tree_aging);
961 tree_load(t, b);
962 tree_dump(t, 0);
963 tree_done(t);
967 struct uct *
968 uct_state_init(char *arg, struct board *b)
970 struct uct *u = calloc2(1, sizeof(struct uct));
971 bool using_elo = false;
973 u->debug_level = debug_level;
974 u->gamelen = MC_GAMELEN;
975 u->mercymin = 0;
976 u->expand_p = 2;
977 u->dumpthres = 1000;
978 u->playout_amaf = true;
979 u->playout_amaf_nakade = false;
980 u->amaf_prior = false;
981 u->max_tree_size = 3072ULL * 1048576;
983 u->dynkomi_mask = S_BLACK;
985 u->threads = 1;
986 u->thread_model = TM_TREEVL;
987 u->parallel_tree = true;
988 u->virtual_loss = true;
990 u->fuseki_end = 20; // max time at 361*20% = 72 moves (our 36th move, still 99 to play)
991 u->yose_start = 40; // (100-40-25)*361/100/2 = 63 moves still to play by us then
992 u->bestr_ratio = 0.02;
993 // 2.5 is clearly too much, but seems to compensate well for overly stern time allocations.
994 // TODO: Further tuning and experiments with better time allocation schemes.
995 u->best2_ratio = 2.5;
997 u->val_scale = 0.04; u->val_points = 40;
999 u->tenuki_d = 4;
1000 u->local_tree_aging = 2;
1002 if (arg) {
1003 char *optspec, *next = arg;
1004 while (*next) {
1005 optspec = next;
1006 next += strcspn(next, ",");
1007 if (*next) { *next++ = 0; } else { *next = 0; }
1009 char *optname = optspec;
1010 char *optval = strchr(optspec, '=');
1011 if (optval) *optval++ = 0;
1013 if (!strcasecmp(optname, "debug")) {
1014 if (optval)
1015 u->debug_level = atoi(optval);
1016 else
1017 u->debug_level++;
1018 } else if (!strcasecmp(optname, "mercy") && optval) {
1019 /* Minimal difference of black/white captures
1020 * to stop playout - "Mercy Rule". Speeds up
1021 * hopeless playouts at the expense of some
1022 * accuracy. */
1023 u->mercymin = atoi(optval);
1024 } else if (!strcasecmp(optname, "gamelen") && optval) {
1025 u->gamelen = atoi(optval);
1026 } else if (!strcasecmp(optname, "expand_p") && optval) {
1027 u->expand_p = atoi(optval);
1028 } else if (!strcasecmp(optname, "dumpthres") && optval) {
1029 u->dumpthres = atoi(optval);
1030 } else if (!strcasecmp(optname, "best2_ratio") && optval) {
1031 /* If set, prolong simulating while
1032 * first_best/second_best playouts ratio
1033 * is less than best2_ratio. */
1034 u->best2_ratio = atof(optval);
1035 } else if (!strcasecmp(optname, "bestr_ratio") && optval) {
1036 /* If set, prolong simulating while
1037 * best,best_best_child values delta
1038 * is more than bestr_ratio. */
1039 u->bestr_ratio = atof(optval);
1040 } else if (!strcasecmp(optname, "playout_amaf")) {
1041 /* Whether to include random playout moves in
1042 * AMAF as well. (Otherwise, only tree moves
1043 * are included in AMAF. Of course makes sense
1044 * only in connection with an AMAF policy.) */
1045 /* with-without: 55.5% (+-4.1) */
1046 if (optval && *optval == '0')
1047 u->playout_amaf = false;
1048 else
1049 u->playout_amaf = true;
1050 } else if (!strcasecmp(optname, "playout_amaf_nakade")) {
1051 /* Whether to include nakade moves from playouts
1052 * in the AMAF statistics; this tends to nullify
1053 * the playout_amaf effect by adding too much
1054 * noise. */
1055 if (optval && *optval == '0')
1056 u->playout_amaf_nakade = false;
1057 else
1058 u->playout_amaf_nakade = true;
1059 } else if (!strcasecmp(optname, "playout_amaf_cutoff") && optval) {
1060 /* Keep only first N% of playout stage AMAF
1061 * information. */
1062 u->playout_amaf_cutoff = atoi(optval);
1063 } else if ((!strcasecmp(optname, "policy") || !strcasecmp(optname, "random_policy")) && optval) {
1064 char *policyarg = strchr(optval, ':');
1065 struct uct_policy **p = !strcasecmp(optname, "policy") ? &u->policy : &u->random_policy;
1066 if (policyarg)
1067 *policyarg++ = 0;
1068 if (!strcasecmp(optval, "ucb1")) {
1069 *p = policy_ucb1_init(u, policyarg);
1070 } else if (!strcasecmp(optval, "ucb1amaf")) {
1071 *p = policy_ucb1amaf_init(u, policyarg);
1072 } else {
1073 fprintf(stderr, "UCT: Invalid tree policy %s\n", optval);
1074 exit(1);
1076 } else if (!strcasecmp(optname, "playout") && optval) {
1077 char *playoutarg = strchr(optval, ':');
1078 if (playoutarg)
1079 *playoutarg++ = 0;
1080 if (!strcasecmp(optval, "moggy")) {
1081 u->playout = playout_moggy_init(playoutarg, b);
1082 } else if (!strcasecmp(optval, "light")) {
1083 u->playout = playout_light_init(playoutarg, b);
1084 } else if (!strcasecmp(optval, "elo")) {
1085 u->playout = playout_elo_init(playoutarg, b);
1086 using_elo = true;
1087 } else {
1088 fprintf(stderr, "UCT: Invalid playout policy %s\n", optval);
1089 exit(1);
1091 } else if (!strcasecmp(optname, "prior") && optval) {
1092 u->prior = uct_prior_init(optval, b);
1093 } else if (!strcasecmp(optname, "amaf_prior") && optval) {
1094 u->amaf_prior = atoi(optval);
1095 } else if (!strcasecmp(optname, "threads") && optval) {
1096 /* By default, Pachi will run with only single
1097 * tree search thread! */
1098 u->threads = atoi(optval);
1099 } else if (!strcasecmp(optname, "thread_model") && optval) {
1100 if (!strcasecmp(optval, "root")) {
1101 /* Root parallelization - each thread
1102 * does independent search, trees are
1103 * merged at the end. */
1104 u->thread_model = TM_ROOT;
1105 u->parallel_tree = false;
1106 u->virtual_loss = false;
1107 } else if (!strcasecmp(optval, "tree")) {
1108 /* Tree parallelization - all threads
1109 * grind on the same tree. */
1110 u->thread_model = TM_TREE;
1111 u->parallel_tree = true;
1112 u->virtual_loss = false;
1113 } else if (!strcasecmp(optval, "treevl")) {
1114 /* Tree parallelization, but also
1115 * with virtual losses - this discou-
1116 * rages most threads choosing the
1117 * same tree branches to read. */
1118 u->thread_model = TM_TREEVL;
1119 u->parallel_tree = true;
1120 u->virtual_loss = true;
1121 } else {
1122 fprintf(stderr, "UCT: Invalid thread model %s\n", optval);
1123 exit(1);
1125 } else if (!strcasecmp(optname, "pondering")) {
1126 /* Keep searching even during opponent's turn. */
1127 u->pondering_opt = !optval || atoi(optval);
1128 } else if (!strcasecmp(optname, "fuseki_end") && optval) {
1129 /* At the very beginning it's not worth thinking
1130 * too long because the playout evaluations are
1131 * very noisy. So gradually increase the thinking
1132 * time up to maximum when fuseki_end percent
1133 * of the board has been played.
1134 * This only applies if we are not in byoyomi. */
1135 u->fuseki_end = atoi(optval);
1136 } else if (!strcasecmp(optname, "yose_start") && optval) {
1137 /* When yose_start percent of the board has been
1138 * played, or if we are in byoyomi, stop spending
1139 * more time and spread the remaining time
1140 * uniformly.
1141 * Between fuseki_end and yose_start, we spend
1142 * a constant proportion of the remaining time
1143 * on each move. (yose_start should actually
1144 * be much earlier than when real yose start,
1145 * but "yose" is a good short name to convey
1146 * the idea.) */
1147 u->yose_start = atoi(optval);
1148 } else if (!strcasecmp(optname, "force_seed") && optval) {
1149 u->force_seed = atoi(optval);
1150 } else if (!strcasecmp(optname, "no_book")) {
1151 u->no_book = true;
1152 } else if (!strcasecmp(optname, "dynkomi") && optval) {
1153 /* Dynamic komi approach; there are multiple
1154 * ways to adjust komi dynamically throughout
1155 * play. We currently support two: */
1156 char *dynkomiarg = strchr(optval, ':');
1157 if (dynkomiarg)
1158 *dynkomiarg++ = 0;
1159 if (!strcasecmp(optval, "none")) {
1160 u->dynkomi = uct_dynkomi_init_none(u, dynkomiarg, b);
1161 } else if (!strcasecmp(optval, "linear")) {
1162 u->dynkomi = uct_dynkomi_init_linear(u, dynkomiarg, b);
1163 } else if (!strcasecmp(optval, "adaptive")) {
1164 u->dynkomi = uct_dynkomi_init_adaptive(u, dynkomiarg, b);
1165 } else {
1166 fprintf(stderr, "UCT: Invalid dynkomi mode %s\n", optval);
1167 exit(1);
1169 } else if (!strcasecmp(optname, "dynkomi_mask") && optval) {
1170 /* Bitmask of colors the player must be
1171 * for dynkomi be applied; you may want
1172 * to use dynkomi_mask=3 to allow dynkomi
1173 * even in games where Pachi is white. */
1174 u->dynkomi_mask = atoi(optval);
1175 } else if (!strcasecmp(optname, "dynkomi_interval") && optval) {
1176 /* If non-zero, re-adjust dynamic komi
1177 * throughout a single genmove reading,
1178 * roughly every N simulations. */
1179 /* XXX: Does not work with tree
1180 * parallelization. */
1181 u->dynkomi_interval = atoi(optval);
1182 } else if (!strcasecmp(optname, "val_scale") && optval) {
1183 /* How much of the game result value should be
1184 * influenced by win size. Zero means it isn't. */
1185 u->val_scale = atof(optval);
1186 } else if (!strcasecmp(optname, "val_points") && optval) {
1187 /* Maximum size of win to be scaled into game
1188 * result value. Zero means boardsize^2. */
1189 u->val_points = atoi(optval) * 2; // result values are doubled
1190 } else if (!strcasecmp(optname, "val_extra")) {
1191 /* If false, the score coefficient will be simply
1192 * added to the value, instead of scaling the result
1193 * coefficient because of it. */
1194 u->val_extra = !optval || atoi(optval);
1195 } else if (!strcasecmp(optname, "local_tree") && optval) {
1196 /* Whether to bias exploration by local tree values
1197 * (must be supported by the used policy).
1198 * 0: Don't.
1199 * 1: Do, value = result.
1200 * Try to temper the result:
1201 * 2: Do, value = 0.5+(result-expected)/2.
1202 * 3: Do, value = 0.5+bzz((result-expected)^2).
1203 * 4: Do, value = 0.5+sqrt(result-expected)/2. */
1204 u->local_tree = atoi(optval);
1205 } else if (!strcasecmp(optname, "tenuki_d") && optval) {
1206 /* Tenuki distance at which to break the local tree. */
1207 u->tenuki_d = atoi(optval);
1208 if (u->tenuki_d > TREE_NODE_D_MAX + 1) {
1209 fprintf(stderr, "uct: tenuki_d must not be larger than TREE_NODE_D_MAX+1 %d\n", TREE_NODE_D_MAX + 1);
1210 exit(1);
1212 } else if (!strcasecmp(optname, "local_tree_aging") && optval) {
1213 /* How much to reduce local tree values between moves. */
1214 u->local_tree_aging = atof(optval);
1215 } else if (!strcasecmp(optname, "local_tree_allseq")) {
1216 /* By default, only complete sequences are stored
1217 * in the local tree. If this is on, also
1218 * subsequences starting at each move are stored. */
1219 u->local_tree_allseq = !optval || atoi(optval);
1220 } else if (!strcasecmp(optname, "local_tree_playout")) {
1221 /* Whether to adjust ELO playout probability
1222 * distributions according to matched localtree
1223 * information. */
1224 u->local_tree_playout = !optval || atoi(optval);
1225 } else if (!strcasecmp(optname, "local_tree_pseqroot")) {
1226 /* By default, when we have no sequence move
1227 * to suggest in-playout, we give up. If this
1228 * is on, we make probability distribution from
1229 * sequences first moves instead. */
1230 u->local_tree_pseqroot = !optval || atoi(optval);
1231 } else if (!strcasecmp(optname, "pass_all_alive")) {
1232 /* Whether to consider passing only after all
1233 * dead groups were removed from the board;
1234 * this is like all genmoves are in fact
1235 * kgs-genmove_cleanup. */
1236 u->pass_all_alive = !optval || atoi(optval);
1237 } else if (!strcasecmp(optname, "territory_scoring")) {
1238 /* Use territory scoring (default is area scoring).
1239 * An explicit kgs-rules command overrides this. */
1240 u->territory_scoring = !optval || atoi(optval);
1241 } else if (!strcasecmp(optname, "random_policy_chance") && optval) {
1242 /* If specified (N), with probability 1/N, random_policy policy
1243 * descend is used instead of main policy descend; useful
1244 * if specified policy (e.g. UCB1AMAF) can make unduly biased
1245 * choices sometimes, you can fall back to e.g.
1246 * random_policy=UCB1. */
1247 u->random_policy_chance = atoi(optval);
1248 } else if (!strcasecmp(optname, "max_tree_size") && optval) {
1249 /* Maximum amount of memory [MiB] consumed by the move tree.
1250 * For fast_alloc it includes the temp tree used for pruning.
1251 * Default is 3072 (3 GiB). Note that if you use TM_ROOT,
1252 * this limits size of only one of the trees, not all of them
1253 * together. */
1254 u->max_tree_size = atol(optval) * 1048576;
1255 } else if (!strcasecmp(optname, "fast_alloc")) {
1256 u->fast_alloc = !optval || atoi(optval);
1257 } else if (!strcasecmp(optname, "slave")) {
1258 /* Act as slave for the distributed engine. */
1259 u->slave = !optval || atoi(optval);
1260 } else if (!strcasecmp(optname, "banner") && optval) {
1261 /* Additional banner string. This must come as the
1262 * last engine parameter. */
1263 if (*next) *--next = ',';
1264 u->banner = strdup(optval);
1265 break;
1266 } else {
1267 fprintf(stderr, "uct: Invalid engine argument %s or missing value\n", optname);
1268 exit(1);
1273 u->resign_ratio = 0.2; /* Resign when most games are lost. */
1274 u->loss_threshold = 0.85; /* Stop reading if after at least 2000 playouts this is best value. */
1275 if (!u->policy)
1276 u->policy = policy_ucb1amaf_init(u, NULL);
1278 if (!!u->random_policy_chance ^ !!u->random_policy) {
1279 fprintf(stderr, "uct: Only one of random_policy and random_policy_chance is set\n");
1280 exit(1);
1283 if (!u->local_tree) {
1284 /* No ltree aging. */
1285 u->local_tree_aging = 1.0f;
1287 if (!using_elo)
1288 u->local_tree_playout = false;
1290 if (u->fast_alloc && !u->parallel_tree) {
1291 fprintf(stderr, "fast_alloc not supported with root parallelization.\n");
1292 exit(1);
1294 if (u->fast_alloc)
1295 u->max_tree_size = (100ULL * u->max_tree_size) / (100 + MIN_FREE_MEM_PERCENT);
1297 if (!u->prior)
1298 u->prior = uct_prior_init(NULL, b);
1300 if (!u->playout)
1301 u->playout = playout_moggy_init(NULL, b);
1302 u->playout->debug_level = u->debug_level;
1304 u->ownermap.map = malloc2(board_size2(b) * sizeof(u->ownermap.map[0]));
1305 u->stats = malloc2(board_size2(b) * sizeof(u->stats[0]));
1307 if (!u->dynkomi)
1308 u->dynkomi = uct_dynkomi_init_linear(u, NULL, b);
1310 /* Some things remain uninitialized for now - the opening book
1311 * is not loaded and the tree not set up. */
1312 /* This will be initialized in setup_state() at the first move
1313 * received/requested. This is because right now we are not aware
1314 * about any komi or handicap setup and such. */
1316 return u;
1319 struct engine *
1320 engine_uct_init(char *arg, struct board *b)
1322 struct uct *u = uct_state_init(arg, b);
1323 struct engine *e = calloc2(1, sizeof(struct engine));
1324 e->name = "UCT Engine";
1325 e->printhook = uct_printhook_ownermap;
1326 e->notify_play = uct_notify_play;
1327 e->chat = uct_chat;
1328 e->genmove = uct_genmove;
1329 e->genmoves = uct_genmoves;
1330 e->dead_group_list = uct_dead_group_list;
1331 e->done = uct_done;
1332 e->data = u;
1333 if (u->slave)
1334 e->notify = uct_notify;
1336 const char banner[] = "I'm playing UCT. When I'm losing, I will resign, "
1337 "if I think I win, I play until you pass. "
1338 "Anyone can send me 'winrate' in private chat to get my assessment of the position.";
1339 if (!u->banner) u->banner = "";
1340 e->comment = malloc2(sizeof(banner) + strlen(u->banner) + 1);
1341 sprintf(e->comment, "%s %s", banner, u->banner);
1343 return e;