14 #include "playout/moggy.h"
15 #include "playout/light.h"
17 #include "uct/internal.h"
18 #include "uct/prior.h"
22 struct uct_policy
*policy_ucb1_init(struct uct
*u
, char *arg
);
23 struct uct_policy
*policy_ucb1tuned_init(struct uct
*u
, char *arg
);
24 struct uct_policy
*policy_ucb1amaf_init(struct uct
*u
, char *arg
);
27 #define MC_GAMES 80000
28 #define MC_GAMELEN MAX_GAMELEN
32 can_pass(struct board
*b
, enum stone color
)
34 float score
= board_official_score(b
);
37 //fprintf(stderr, "%d score %f\n", color, score);
42 progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
)
48 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
);
50 fprintf(stderr
, "... No moves left\n");
53 fprintf(stderr
, "[%d] ", playouts
);
54 fprintf(stderr
, "best %f ", tree_node_get_value(t
, best
, u
, 1));
57 fprintf(stderr
, "deepest % 2d ", t
->max_depth
- t
->root
->depth
);
60 fprintf(stderr
, "| seq ");
61 for (int depth
= 0; depth
< 6; depth
++) {
62 if (best
&& best
->u
.playouts
>= 25) {
63 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
64 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
);
71 fprintf(stderr
, "| can ");
73 struct tree_node
*can
[cans
];
74 memset(can
, 0, sizeof(can
));
75 best
= t
->root
->children
;
78 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
79 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
80 if (c
> 0) can
[c
- 1] = best
;
85 fprintf(stderr
, "%3s(%.3f) ",
86 coord2sstr(can
[cans
]->coord
, t
->board
),
87 tree_node_get_value(t
, can
[cans
], u
, 1));
93 fprintf(stderr
, "\n");
98 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
99 struct playout_amafmap
*amaf
,
100 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
103 enum stone next_color
= stone_other(node_color
);
104 int parity
= (next_color
== player_color
? 1 : -1);
105 if (n
->u
.playouts
>= u
->expand_p
) {
106 // fprintf(stderr, "expanding %s (%p ^-%p)\n", coord2sstr(n->coord, b), n, n->parent);
107 tree_expand_node(t
, n
, b
, next_color
, u
->radar_d
, u
, parity
);
110 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
111 spaces
, n
->u
.playouts
, coord2sstr(n
->coord
, t
->board
),
112 tree_node_get_value(t
, n
, u
, parity
));
114 int result
= play_random_game(b
, next_color
, u
->gamelen
, u
->playout_amaf
? amaf
: NULL
, u
->playout
);
115 if (next_color
== S_WHITE
&& result
>= 0) {
116 /* We need the result from black's perspective. */
120 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
121 spaces
, player_color
, next_color
, coord2sstr(n
->coord
, t
->board
), result
);
127 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
132 struct playout_amafmap
*amaf
= NULL
;
133 if (u
->policy
->wants_amaf
) {
134 amaf
= calloc(1, sizeof(*amaf
));
135 amaf
->map
= calloc(board_size2(&b2
) + 1, sizeof(*amaf
->map
));
136 amaf
->map
++; // -1 is pass
139 /* Walk the tree until we find a leaf, then expand it and do
140 * a random playout. */
141 struct tree_node
*n
= t
->root
;
142 enum stone node_color
= stone_other(player_color
);
143 assert(node_color
== t
->root_color
);
146 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
147 int passes
= is_pass(b
->last_move
.coord
);
151 static char spaces
[] = "\0 ";
154 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
156 while (!tree_leaf_node(n
) && passes
< 2) {
157 spaces
[depth
++] = ' '; spaces
[depth
] = 0;
159 /* Parity is chosen already according to the child color, since
160 * it is applied to children. */
161 node_color
= stone_other(node_color
);
162 int parity
= (node_color
== player_color
? 1 : -1);
163 n
= u
->policy
->descend(u
->policy
, t
, n
, parity
, pass_limit
);
165 assert(n
== t
->root
|| n
->parent
);
167 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %f\n",
168 spaces
, coord2sstr(n
->coord
, t
->board
), n
->coord
,
169 tree_node_get_value(t
, n
, u
, parity
));
171 assert(n
->coord
>= -1);
172 if (amaf
&& !is_pass(n
->coord
)) {
173 if (amaf
->map
[n
->coord
] == S_NONE
|| amaf
->map
[n
->coord
] == node_color
) {
174 amaf
->map
[n
->coord
] = node_color
;
175 } else { // XXX: Respect amaf->record_nakade
176 amaf_op(amaf
->map
[n
->coord
], +);
178 amaf
->game
[amaf
->gamelen
].coord
= n
->coord
;
179 amaf
->game
[amaf
->gamelen
].color
= node_color
;
181 assert(amaf
->gamelen
< sizeof(amaf
->game
) / sizeof(amaf
->game
[0]));
184 struct move m
= { n
->coord
, node_color
};
185 int res
= board_play(&b2
, &m
);
187 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
188 || b2
.superko_violation
) {
190 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
191 fprintf(stderr
, "%s<%lld> ", coord2sstr(ni
->coord
, t
->board
), ni
->hash
);
192 fprintf(stderr
, "deleting invalid %s node %d,%d res %d group %d spk %d\n",
193 stone2str(node_color
), coord_x(n
->coord
,b
), coord_y(n
->coord
,b
),
194 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
196 tree_delete_node(t
, n
);
201 if (is_pass(n
->coord
))
208 amaf
->game_baselen
= amaf
->gamelen
;
209 amaf
->record_nakade
= u
->playout_amaf_nakade
;
213 float score
= board_official_score(&b2
);
214 /* Result from black's perspective (no matter who
215 * the player; black's perspective is always
216 * what the tree stores. */
220 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
221 player_color
, node_color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
223 board_print(&b2
, stderr
);
225 } else { assert(tree_leaf_node(n
));
226 result
= uct_leaf_node(u
, &b2
, player_color
, amaf
, t
, n
, node_color
, spaces
);
229 if (amaf
&& u
->playout_amaf_cutoff
) {
230 int cutoff
= amaf
->game_baselen
;
231 cutoff
+= (amaf
->gamelen
- amaf
->game_baselen
) * u
->playout_amaf_cutoff
/ 100;
232 /* Now, reconstruct the amaf map. */
233 memset(amaf
->map
, 0, board_size2(&b2
) * sizeof(*amaf
->map
));
234 for (int i
= 0; i
< cutoff
; i
++) {
235 coord_t coord
= amaf
->game
[i
].coord
;
236 enum stone color
= amaf
->game
[i
].color
;
237 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
238 amaf
->map
[coord
] = color
;
239 /* Nakade always recorded for in-tree part */
240 } else if (amaf
->record_nakade
|| i
<= amaf
->game_baselen
) {
241 amaf_op(amaf
->map
[n
->coord
], +);
246 assert(n
== t
->root
|| n
->parent
);
248 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, amaf
, result
);
255 board_done_noalloc(&b2
);
260 prepare_move(struct engine
*e
, struct board
*b
, enum stone color
, coord_t promote
)
262 struct uct
*u
= e
->data
;
264 if (u
->t
&& (!b
->moves
|| color
!= stone_other(u
->t
->root_color
))) {
265 /* Stale state from last game */
271 u
->t
= tree_init(b
, color
);
273 fast_srandom(u
->force_seed
);
275 fprintf(stderr
, "Fresh board with random seed %lu\n", fast_getseed());
276 //board_print(b, stderr);
277 if (!u
->no_book
&& b
->moves
< 2)
281 /* XXX: We hope that the opponent didn't suddenly play
282 * several moves in the row. */
283 if (!is_resign(promote
) && !tree_promote_at(u
->t
, b
, promote
)) {
285 fprintf(stderr
, "<cannot find node to promote>\n");
288 u
->t
= tree_init(b
, color
);
292 /* Set in main thread in case the playouts should stop. */
293 static volatile sig_atomic_t halt
= 0;
296 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
)
298 int i
, games
= u
->games
;
299 if (t
->root
->children
)
300 games
-= t
->root
->u
.playouts
/ 1.5;
301 /* else this is highly read-out but dead-end branch of opening book;
302 * we need to start from scratch; XXX: Maybe actually base the readout
303 * count based on number of playouts of best node? */
304 for (i
= 0; i
< games
; i
++) {
305 int result
= uct_playout(u
, b
, color
, t
);
307 /* Tree descent has hit invalid move. */
311 if (i
> 0 && !(i
% 10000)) {
312 progress_status(u
, t
, color
, i
);
315 if (i
> 0 && !(i
% 500)) {
316 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, b
, color
);
317 if (best
&& ((best
->u
.playouts
>= 5000 && tree_node_get_value(t
, best
, u
, 1) >= u
->loss_threshold
)
318 || (best
->u
.playouts
>= 500 && tree_node_get_value(t
, best
, u
, 1) >= 0.95)))
324 fprintf(stderr
, "<halting early, %d games skipped>\n", games
- i
);
329 progress_status(u
, t
, color
, i
);
331 tree_dump(t
, u
->dumpthres
);
335 static pthread_mutex_t finish_mutex
= PTHREAD_MUTEX_INITIALIZER
;
336 static pthread_cond_t finish_cond
= PTHREAD_COND_INITIALIZER
;
337 static volatile int finish_thread
;
338 static pthread_mutex_t finish_serializer
= PTHREAD_MUTEX_INITIALIZER
;
351 spawn_helper(void *ctx_
)
353 struct spawn_ctx
*ctx
= ctx_
;
355 fast_srandom(ctx
->seed
);
357 ctx
->games
= uct_playouts(ctx
->u
, ctx
->b
, ctx
->color
, ctx
->t
);
359 pthread_mutex_lock(&finish_serializer
);
360 pthread_mutex_lock(&finish_mutex
);
361 finish_thread
= ctx
->tid
;
362 pthread_cond_signal(&finish_cond
);
363 pthread_mutex_unlock(&finish_mutex
);
368 uct_notify_play(struct engine
*e
, struct board
*b
, struct move
*m
)
370 prepare_move(e
, b
, m
->color
, m
->coord
);
374 uct_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
376 struct uct
*u
= e
->data
;
379 prepare_move(e
, b
, color
, resign
);
381 if (b
->superko_violation
) {
382 fprintf(stderr
, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
383 fprintf(stderr
, "Maybe you play with situational instead of positional superko?\n");
384 fprintf(stderr
, "I'm going to ignore the violation, but note that I may miss\n");
385 fprintf(stderr
, "some moves valid under this ruleset because of this.\n");
386 b
->superko_violation
= false;
389 /* If the opponent just passes and we win counting, just
391 if (b
->moves
> 1 && is_pass(b
->last_move
.coord
) && can_pass(b
, color
))
392 return coord_copy(pass
);
394 int played_games
= 0;
396 played_games
= uct_playouts(u
, b
, color
, u
->t
);
398 pthread_t threads
[u
->threads
];
401 pthread_mutex_lock(&finish_mutex
);
402 /* Spawn threads... */
403 for (int ti
= 0; ti
< u
->threads
; ti
++) {
404 struct spawn_ctx
*ctx
= malloc(sizeof(*ctx
));
405 ctx
->u
= u
; ctx
->b
= b
; ctx
->color
= color
;
406 ctx
->t
= tree_copy(u
->t
); ctx
->tid
= ti
;
407 ctx
->seed
= fast_random(65536) + ti
;
408 pthread_create(&threads
[ti
], NULL
, spawn_helper
, ctx
);
410 fprintf(stderr
, "Spawned thread %d\n", ti
);
412 /* ...and collect them back: */
413 while (joined
< u
->threads
) {
414 /* Wait for some thread to finish... */
415 pthread_cond_wait(&finish_cond
, &finish_mutex
);
416 /* ...and gather its remnants. */
417 struct spawn_ctx
*ctx
;
418 pthread_join(threads
[finish_thread
], (void **) &ctx
);
419 played_games
+= ctx
->games
;
421 tree_merge(u
->t
, ctx
->t
, u
->amaf_prior
);
425 fprintf(stderr
, "Joined thread %d\n", finish_thread
);
426 /* Do not get stalled by slow threads. */
427 if (joined
>= u
->threads
/ 2)
429 pthread_mutex_unlock(&finish_serializer
);
431 pthread_mutex_unlock(&finish_mutex
);
433 tree_normalize(u
->t
, u
->threads
);
437 tree_dump(u
->t
, u
->dumpthres
);
439 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
);
441 tree_done(u
->t
); u
->t
= NULL
;
442 return coord_copy(pass
);
445 progress_status(u
, u
->t
, color
, played_games
);
447 fprintf(stderr
, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d games)\n",
448 coord2sstr(best
->coord
, b
), coord_x(best
->coord
, b
), coord_y(best
->coord
, b
),
449 tree_node_get_value(u
->t
, best
, u
, 1),
450 best
->u
.playouts
, u
->t
->root
->u
.playouts
, played_games
);
451 if (tree_node_get_value(u
->t
, best
, u
, 1) < u
->resign_ratio
&& !is_pass(best
->coord
)) {
452 tree_done(u
->t
); u
->t
= NULL
;
453 return coord_copy(resign
);
455 tree_promote_node(u
->t
, best
);
456 return coord_copy(best
->coord
);
460 uct_genbook(struct engine
*e
, struct board
*b
, enum stone color
)
462 struct uct
*u
= e
->data
;
463 u
->t
= tree_init(b
, color
);
467 for (i
= 0; i
< u
->games
; i
++) {
468 int result
= uct_playout(u
, b
, color
, u
->t
);
470 /* Tree descent has hit invalid move. */
474 if (i
> 0 && !(i
% 10000)) {
475 progress_status(u
, u
->t
, color
, i
);
478 progress_status(u
, u
->t
, color
, i
);
480 tree_save(u
->t
, b
, u
->games
/ 100);
488 uct_dumpbook(struct engine
*e
, struct board
*b
, enum stone color
)
490 struct uct
*u
= e
->data
;
491 u
->t
= tree_init(b
, color
);
499 uct_state_init(char *arg
)
501 struct uct
*u
= calloc(1, sizeof(struct uct
));
505 u
->gamelen
= MC_GAMELEN
;
508 u
->playout_amaf
= true;
509 u
->playout_amaf_nakade
= false;
510 u
->amaf_prior
= true;
512 // gp: 14 vs 0: 44% (+-3.5)
514 u
->even_eqex
= u
->policy_eqex
= u
->b19_eqex
= u
->cfgd_eqex
= -1;
515 u
->eqex
= 6; /* Even number! */
518 char *optspec
, *next
= arg
;
521 next
+= strcspn(next
, ",");
522 if (*next
) { *next
++ = 0; } else { *next
= 0; }
524 char *optname
= optspec
;
525 char *optval
= strchr(optspec
, '=');
526 if (optval
) *optval
++ = 0;
528 if (!strcasecmp(optname
, "debug")) {
530 u
->debug_level
= atoi(optval
);
533 } else if (!strcasecmp(optname
, "games") && optval
) {
534 u
->games
= atoi(optval
);
535 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
536 u
->gamelen
= atoi(optval
);
537 } else if (!strcasecmp(optname
, "expand_p") && optval
) {
538 u
->expand_p
= atoi(optval
);
539 } else if (!strcasecmp(optname
, "radar_d") && optval
) {
540 /* For 19x19, it is good idea to set this to 3. */
541 u
->radar_d
= atoi(optval
);
542 } else if (!strcasecmp(optname
, "dumpthres") && optval
) {
543 u
->dumpthres
= atoi(optval
);
544 } else if (!strcasecmp(optname
, "playout_amaf")) {
545 /* Whether to include random playout moves in
546 * AMAF as well. (Otherwise, only tree moves
547 * are included in AMAF. Of course makes sense
548 * only in connection with an AMAF policy.) */
549 /* with-without: 55.5% (+-4.1) */
550 if (optval
&& *optval
== '0')
551 u
->playout_amaf
= false;
553 u
->playout_amaf
= true;
554 } else if (!strcasecmp(optname
, "playout_amaf_nakade")) {
555 /* Whether to include nakade moves from playouts
556 * in the AMAF statistics; this tends to nullify
557 * the playout_amaf effect by adding too much
559 if (optval
&& *optval
== '0')
560 u
->playout_amaf_nakade
= false;
562 u
->playout_amaf_nakade
= true;
563 } else if (!strcasecmp(optname
, "playout_amaf_cutoff") && optval
) {
564 /* Keep only first N% of playout stage AMAF
566 u
->playout_amaf_cutoff
= atoi(optval
);
567 } else if (!strcasecmp(optname
, "policy") && optval
) {
568 char *policyarg
= strchr(optval
, ':');
571 if (!strcasecmp(optval
, "ucb1")) {
572 u
->policy
= policy_ucb1_init(u
, policyarg
);
573 } else if (!strcasecmp(optval
, "ucb1tuned")) {
574 u
->policy
= policy_ucb1tuned_init(u
, policyarg
);
575 } else if (!strcasecmp(optval
, "ucb1amaf")) {
576 u
->policy
= policy_ucb1amaf_init(u
, policyarg
);
578 fprintf(stderr
, "UCT: Invalid tree policy %s\n", optval
);
580 } else if (!strcasecmp(optname
, "playout") && optval
) {
581 char *playoutarg
= strchr(optval
, ':');
584 if (!strcasecmp(optval
, "moggy")) {
585 u
->playout
= playout_moggy_init(playoutarg
);
586 } else if (!strcasecmp(optval
, "light")) {
587 u
->playout
= playout_light_init(playoutarg
);
589 fprintf(stderr
, "UCT: Invalid playout policy %s\n", optval
);
591 } else if (!strcasecmp(optname
, "prior") && optval
) {
592 u
->eqex
= atoi(optval
);
593 } else if (!strcasecmp(optname
, "prior_even") && optval
) {
594 u
->even_eqex
= atoi(optval
);
595 } else if (!strcasecmp(optname
, "prior_gp") && optval
) {
596 u
->gp_eqex
= atoi(optval
);
597 } else if (!strcasecmp(optname
, "prior_policy") && optval
) {
598 u
->policy_eqex
= atoi(optval
);
599 } else if (!strcasecmp(optname
, "prior_b19") && optval
) {
600 u
->b19_eqex
= atoi(optval
);
601 } else if (!strcasecmp(optname
, "prior_cfgd") && optval
) {
602 u
->cfgd_eqex
= atoi(optval
);
603 } else if (!strcasecmp(optname
, "amaf_prior") && optval
) {
604 u
->amaf_prior
= atoi(optval
);
605 } else if (!strcasecmp(optname
, "threads") && optval
) {
606 u
->threads
= atoi(optval
);
607 } else if (!strcasecmp(optname
, "force_seed") && optval
) {
608 u
->force_seed
= atoi(optval
);
609 } else if (!strcasecmp(optname
, "no_book")) {
612 fprintf(stderr
, "uct: Invalid engine argument %s or missing value\n", optname
);
617 if (u
->even_eqex
< 0) u
->even_eqex
= u
->eqex
;
618 if (u
->gp_eqex
< 0) u
->gp_eqex
= u
->eqex
;
619 if (u
->policy_eqex
< 0) u
->policy_eqex
= u
->eqex
;
620 if (u
->b19_eqex
< 0) u
->b19_eqex
= u
->eqex
;
621 if (u
->cfgd_eqex
< 0) u
->cfgd_eqex
= u
->eqex
;
623 u
->resign_ratio
= 0.2; /* Resign when most games are lost. */
624 u
->loss_threshold
= 0.85; /* Stop reading if after at least 5000 playouts this is best value. */
626 u
->policy
= policy_ucb1amaf_init(u
, NULL
);
629 u
->playout
= playout_moggy_init(NULL
);
630 u
->playout
->debug_level
= u
->debug_level
;
637 engine_uct_init(char *arg
)
639 struct uct
*u
= uct_state_init(arg
);
640 struct engine
*e
= calloc(1, sizeof(struct engine
));
641 e
->name
= "UCT Engine";
642 e
->comment
= "I'm playing UCT. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
643 e
->genmove
= uct_genmove
;
644 e
->notify_play
= uct_notify_play
;