14 #include "playout/moggy.h"
15 #include "playout/light.h"
17 #include "uct/internal.h"
21 struct uct_policy
*policy_ucb1_init(struct uct
*u
, char *arg
);
22 struct uct_policy
*policy_ucb1tuned_init(struct uct
*u
, char *arg
);
23 struct uct_policy
*policy_ucb1amaf_init(struct uct
*u
, char *arg
);
26 #define MC_GAMES 80000
27 #define MC_GAMELEN 400
31 progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
)
37 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
);
39 fprintf(stderr
, "... No moves left\n");
42 fprintf(stderr
, "[%d] ", playouts
);
43 fprintf(stderr
, "best %f ", best
->u
.value
);
46 fprintf(stderr
, "deepest % 2d ", t
->max_depth
- t
->root
->depth
);
49 fprintf(stderr
, "| seq ");
50 for (int depth
= 0; depth
< 6; depth
++) {
51 if (best
&& best
->u
.playouts
>= 25) {
52 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
53 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
);
60 fprintf(stderr
, "| can ");
62 struct tree_node
*can
[cans
];
63 memset(can
, 0, sizeof(can
));
64 best
= t
->root
->children
;
67 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
68 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
69 if (c
> 0) can
[c
- 1] = best
;
74 fprintf(stderr
, "%3s(%.3f) ", coord2sstr(can
[cans
]->coord
, t
->board
), can
[cans
]->u
.value
);
80 fprintf(stderr
, "\n");
85 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
86 struct playout_amafmap
*amaf
,
87 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
90 enum stone next_color
= stone_other(node_color
);
91 if (n
->u
.playouts
>= u
->expand_p
) {
92 // fprintf(stderr, "expanding %s (%p ^-%p)\n", coord2sstr(n->coord, b), n, n->parent);
93 tree_expand_node(t
, n
, b
, next_color
, u
->radar_d
, u
->policy
,
94 (next_color
== player_color
? 1 : -1));
97 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
98 spaces
, n
->u
.playouts
, coord2sstr(n
->coord
, t
->board
), n
->u
.value
);
100 int result
= play_random_game(b
, next_color
, u
->gamelen
, u
->playout_amaf
? amaf
: NULL
, u
->playout
);
101 if (player_color
!= next_color
&& result
>= 0)
104 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
105 spaces
, player_color
, next_color
, coord2sstr(n
->coord
, t
->board
), result
);
111 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
116 struct playout_amafmap
*amaf
= NULL
;
117 if (u
->policy
->wants_amaf
) {
118 amaf
= calloc(1, sizeof(*amaf
));
119 amaf
->map
= calloc(board_size2(&b2
) + 1, sizeof(*amaf
->map
));
120 amaf
->map
++; // -1 is pass
123 /* Walk the tree until we find a leaf, then expand it and do
124 * a random playout. */
125 struct tree_node
*n
= t
->root
;
126 enum stone node_color
= stone_other(player_color
);
127 assert(node_color
== t
->root_color
);
130 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
131 int passes
= is_pass(b
->last_move
.coord
);
135 static char spaces
[] = "\0 ";
138 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
140 while (!tree_leaf_node(n
) && passes
< 2) {
141 spaces
[depth
++] = ' '; spaces
[depth
] = 0;
143 /* Parity is chosen already according to the child color, since
144 * it is applied to children. */
145 node_color
= stone_other(node_color
);
146 n
= u
->policy
->descend(u
->policy
, t
, n
, (node_color
== player_color
? 1 : -1), pass_limit
);
148 assert(n
== t
->root
|| n
->parent
);
150 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %f\n",
151 spaces
, coord2sstr(n
->coord
, t
->board
), n
->coord
, n
->u
.value
);
153 if (amaf
&& n
->coord
>= -1 && !is_pass(n
->coord
)) {
154 if (amaf
->map
[n
->coord
] == S_NONE
) {
155 amaf
->map
[n
->coord
] = node_color
;
157 amaf_op(amaf
->map
[n
->coord
], +);
161 struct move m
= { n
->coord
, node_color
};
162 int res
= board_play(&b2
, &m
);
164 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
165 || b2
.superko_violation
) {
167 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
168 fprintf(stderr
, "%s ", coord2sstr(ni
->coord
, t
->board
));
169 fprintf(stderr
, "deleting invalid %s node %d,%d res %d group %d spk %d\n",
170 stone2str(node_color
), coord_x(n
->coord
,b
), coord_y(n
->coord
,b
),
171 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
173 tree_delete_node(t
, n
);
178 if (is_pass(n
->coord
))
185 float score
= board_official_score(&b2
);
186 result
= (player_color
== S_BLACK
) ? score
< 0 : score
> 0;
189 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
190 player_color
, node_color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
192 board_print(&b2
, stderr
);
194 } else { assert(tree_leaf_node(n
));
195 result
= uct_leaf_node(u
, &b2
, player_color
, amaf
, t
, n
, node_color
, spaces
);
198 assert(n
== t
->root
|| n
->parent
);
200 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, amaf
, result
);
207 board_done_noalloc(&b2
);
212 prepare_move(struct engine
*e
, struct board
*b
, enum stone color
, coord_t promote
)
214 struct uct
*u
= e
->data
;
216 if (!b
->moves
&& u
->t
) {
217 /* Stale state from last game */
223 u
->t
= tree_init(b
, color
);
225 fast_srandom(u
->force_seed
);
227 fprintf(stderr
, "Fresh board with random seed %lu\n", fast_getseed());
228 //board_print(b, stderr);
229 tree_load(u
->t
, b
, color
);
232 /* XXX: We hope that the opponent didn't suddenly play
233 * several moves in the row. */
234 if (!is_resign(promote
) && !tree_promote_at(u
->t
, b
, promote
)) {
236 fprintf(stderr
, "<cannot find node to promote>\n");
239 u
->t
= tree_init(b
, color
);
243 /* Set in main thread in case the playouts should stop. */
244 static volatile sig_atomic_t halt
= 0;
247 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
)
249 int i
, games
= u
->games
;
250 if (t
->root
->children
)
251 games
-= t
->root
->u
.playouts
/ 1.5;
252 /* else this is highly read-out but dead-end branch of opening book;
253 * we need to start from scratch; XXX: Maybe actually base the readout
254 * count based on number of playouts of best node? */
255 for (i
= 0; i
< games
; i
++) {
256 int result
= uct_playout(u
, b
, color
, t
);
258 /* Tree descent has hit invalid move. */
262 if (i
> 0 && !(i
% 10000)) {
263 progress_status(u
, t
, color
, i
);
266 if (i
> 0 && !(i
% 500)) {
267 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, b
, color
);
268 if (best
&& best
->u
.playouts
>= 5000 && best
->u
.value
>= u
->loss_threshold
)
274 fprintf(stderr
, "<halting early, %d games skipped>\n", games
- i
);
279 progress_status(u
, t
, color
, i
);
281 tree_dump(t
, u
->dumpthres
);
285 static pthread_mutex_t finish_mutex
= PTHREAD_MUTEX_INITIALIZER
;
286 static pthread_cond_t finish_cond
= PTHREAD_COND_INITIALIZER
;
287 static volatile int finish_thread
;
288 static pthread_mutex_t finish_serializer
= PTHREAD_MUTEX_INITIALIZER
;
301 spawn_helper(void *ctx_
)
303 struct spawn_ctx
*ctx
= ctx_
;
305 fast_srandom(ctx
->seed
);
307 ctx
->games
= uct_playouts(ctx
->u
, ctx
->b
, ctx
->color
, ctx
->t
);
309 pthread_mutex_lock(&finish_serializer
);
310 pthread_mutex_lock(&finish_mutex
);
311 finish_thread
= ctx
->tid
;
312 pthread_cond_signal(&finish_cond
);
313 pthread_mutex_unlock(&finish_mutex
);
318 uct_notify_play(struct engine
*e
, struct board
*b
, struct move
*m
)
320 prepare_move(e
, b
, stone_other(m
->color
), m
->coord
);
324 uct_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
326 struct uct
*u
= e
->data
;
329 prepare_move(e
, b
, color
, resign
);
331 int played_games
= 0;
333 played_games
= uct_playouts(u
, b
, color
, u
->t
);
335 pthread_t threads
[u
->threads
];
338 pthread_mutex_lock(&finish_mutex
);
339 /* Spawn threads... */
340 for (int ti
= 0; ti
< u
->threads
; ti
++) {
341 struct spawn_ctx
*ctx
= malloc(sizeof(*ctx
));
342 ctx
->u
= u
; ctx
->b
= b
; ctx
->color
= color
;
343 ctx
->t
= tree_copy(u
->t
); ctx
->tid
= ti
;
344 ctx
->seed
= fast_random(65536) + ti
;
345 pthread_create(&threads
[ti
], NULL
, spawn_helper
, ctx
);
347 fprintf(stderr
, "Spawned thread %d\n", ti
);
349 /* ...and collect them back: */
350 while (joined
< u
->threads
) {
351 /* Wait for some thread to finish... */
352 pthread_cond_wait(&finish_cond
, &finish_mutex
);
353 /* ...and gather its remnants. */
354 struct spawn_ctx
*ctx
;
355 pthread_join(threads
[finish_thread
], (void **) &ctx
);
356 played_games
+= ctx
->games
;
358 tree_merge(u
->t
, ctx
->t
);
362 fprintf(stderr
, "Joined thread %d\n", finish_thread
);
363 /* Do not get stalled by slow threads. */
364 if (joined
>= u
->threads
/ 2)
366 pthread_mutex_unlock(&finish_serializer
);
368 pthread_mutex_unlock(&finish_mutex
);
372 tree_dump(u
->t
, u
->dumpthres
);
374 struct tree_node
*best
= u
->policy
->choose(u
->policy
, u
->t
->root
, b
, color
);
376 tree_done(u
->t
); u
->t
= NULL
;
377 return coord_copy(pass
);
380 progress_status(u
, u
->t
, color
, played_games
);
382 fprintf(stderr
, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d games)\n", coord2sstr(best
->coord
, b
), coord_x(best
->coord
, b
), coord_y(best
->coord
, b
), best
->u
.value
, best
->u
.playouts
, u
->t
->root
->u
.playouts
, played_games
);
383 if (best
->u
.value
< u
->resign_ratio
&& !is_pass(best
->coord
)) {
384 tree_done(u
->t
); u
->t
= NULL
;
385 return coord_copy(resign
);
387 tree_promote_node(u
->t
, best
);
388 return coord_copy(best
->coord
);
392 uct_genbook(struct engine
*e
, struct board
*b
, enum stone color
)
394 struct uct
*u
= e
->data
;
395 u
->t
= tree_init(b
, color
);
396 tree_load(u
->t
, b
, color
);
399 for (i
= 0; i
< u
->games
; i
++) {
400 int result
= uct_playout(u
, b
, color
, u
->t
);
402 /* Tree descent has hit invalid move. */
406 if (i
> 0 && !(i
% 10000)) {
407 progress_status(u
, u
->t
, color
, i
);
410 progress_status(u
, u
->t
, color
, i
);
412 tree_save(u
->t
, b
, u
->games
/ 100);
420 uct_dumpbook(struct engine
*e
, struct board
*b
, enum stone color
)
422 struct uct
*u
= e
->data
;
423 u
->t
= tree_init(b
, color
);
424 tree_load(u
->t
, b
, color
);
431 uct_state_init(char *arg
)
433 struct uct
*u
= calloc(1, sizeof(struct uct
));
437 u
->gamelen
= MC_GAMELEN
;
440 u
->playout_amaf
= false;
443 char *optspec
, *next
= arg
;
446 next
+= strcspn(next
, ",");
447 if (*next
) { *next
++ = 0; } else { *next
= 0; }
449 char *optname
= optspec
;
450 char *optval
= strchr(optspec
, '=');
451 if (optval
) *optval
++ = 0;
453 if (!strcasecmp(optname
, "debug")) {
455 u
->debug_level
= atoi(optval
);
458 } else if (!strcasecmp(optname
, "games") && optval
) {
459 u
->games
= atoi(optval
);
460 } else if (!strcasecmp(optname
, "gamelen") && optval
) {
461 u
->gamelen
= atoi(optval
);
462 } else if (!strcasecmp(optname
, "expand_p") && optval
) {
463 u
->expand_p
= atoi(optval
);
464 } else if (!strcasecmp(optname
, "radar_d") && optval
) {
465 /* For 19x19, it is good idea to set this to 3. */
466 u
->radar_d
= atoi(optval
);
467 } else if (!strcasecmp(optname
, "dumpthres") && optval
) {
468 u
->dumpthres
= atoi(optval
);
469 } else if (!strcasecmp(optname
, "playout_amaf")) {
470 /* Whether to include random playout moves in
471 * AMAF as well. (Otherwise, only tree moves
472 * are included in AMAF. Of course makes sense
473 * only in connection with an AMAF policy.) */
474 /* with-without: 55.5% (+-4.1) */
475 if (optval
&& *optval
== '0')
476 u
->playout_amaf
= false;
478 u
->playout_amaf
= true;
479 } else if (!strcasecmp(optname
, "policy") && optval
) {
480 char *policyarg
= strchr(optval
, ':');
483 if (!strcasecmp(optval
, "ucb1")) {
484 u
->policy
= policy_ucb1_init(u
, policyarg
);
485 } else if (!strcasecmp(optval
, "ucb1tuned")) {
486 u
->policy
= policy_ucb1tuned_init(u
, policyarg
);
487 } else if (!strcasecmp(optval
, "ucb1amaf")) {
488 u
->policy
= policy_ucb1amaf_init(u
, policyarg
);
490 fprintf(stderr
, "UCT: Invalid tree policy %s\n", optval
);
492 } else if (!strcasecmp(optname
, "playout") && optval
) {
493 char *playoutarg
= strchr(optval
, ':');
496 if (!strcasecmp(optval
, "moggy")) {
497 u
->playout
= playout_moggy_init(playoutarg
);
498 } else if (!strcasecmp(optval
, "light")) {
499 u
->playout
= playout_light_init(playoutarg
);
501 fprintf(stderr
, "UCT: Invalid playout policy %s\n", optval
);
503 } else if (!strcasecmp(optname
, "threads") && optval
) {
504 u
->threads
= atoi(optval
);
505 } else if (!strcasecmp(optname
, "force_seed") && optval
) {
506 u
->force_seed
= atoi(optval
);
508 fprintf(stderr
, "uct: Invalid engine argument %s or missing value\n", optname
);
513 u
->resign_ratio
= 0.2; /* Resign when most games are lost. */
514 u
->loss_threshold
= 0.85; /* Stop reading if after at least 5000 playouts this is best value. */
516 u
->policy
= policy_ucb1amaf_init(u
, NULL
);
519 u
->playout
= playout_moggy_init(NULL
);
520 u
->playout
->debug_level
= u
->debug_level
;
527 engine_uct_init(char *arg
)
529 struct uct
*u
= uct_state_init(arg
);
530 struct engine
*e
= calloc(1, sizeof(struct engine
));
531 e
->name
= "UCT Engine";
532 e
->comment
= "I'm playing UCT. When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
533 e
->genmove
= uct_genmove
;
534 e
->notify_play
= uct_notify_play
;