14 #include "playout/elo.h"
17 #include "uct/dynkomi.h"
18 #include "uct/internal.h"
19 #include "uct/search.h"
25 uct_progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
)
31 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
33 fprintf(stderr
, "... No moves left\n");
36 fprintf(stderr
, "[%d] ", playouts
);
37 fprintf(stderr
, "best %f ", tree_node_get_value(t
, 1, best
->u
.value
));
40 fprintf(stderr
, "deepest % 2d ", t
->max_depth
- t
->root
->depth
);
43 fprintf(stderr
, "| seq ");
44 for (int depth
= 0; depth
< 6; depth
++) {
45 if (best
&& best
->u
.playouts
>= 25) {
46 fprintf(stderr
, "%3s ", coord2sstr(best
->coord
, t
->board
));
47 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
54 fprintf(stderr
, "| can ");
56 struct tree_node
*can
[cans
];
57 memset(can
, 0, sizeof(can
));
58 best
= t
->root
->children
;
61 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
62 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
63 if (c
> 0) can
[c
- 1] = best
;
68 fprintf(stderr
, "%3s(%.3f) ",
69 coord2sstr(can
[cans
]->coord
, t
->board
),
70 tree_node_get_value(t
, 1, can
[cans
]->u
.value
));
76 fprintf(stderr
, "\n");
80 struct uct_playout_callback
{
83 struct tree_node
*lnode
;
87 uct_playout_probdist(void *data
, struct board
*b
, enum stone to_play
, struct probdist
*pd
)
89 /* Create probability distribution according to found local tree
91 struct uct_playout_callback
*upc
= data
;
92 assert(upc
&& upc
->tree
&& pd
&& b
);
93 coord_t c
= b
->last_move
.coord
;
94 enum stone color
= b
->last_move
.color
;
97 /* Break local sequence. */
99 } else if (upc
->lnode
) {
100 /* Try to follow local sequence. */
101 upc
->lnode
= tree_get_node(upc
->tree
, upc
->lnode
, c
, false);
104 if (!upc
->lnode
|| !upc
->lnode
->children
) {
105 /* There's no local sequence, start new one! */
106 upc
->lnode
= color
== S_BLACK
? upc
->tree
->ltree_black
: upc
->tree
->ltree_white
;
107 upc
->lnode
= tree_get_node(upc
->tree
, upc
->lnode
, c
, false);
110 if (!upc
->lnode
|| !upc
->lnode
->children
) {
111 /* We have no local sequence and we cannot find any starting
112 * by node corresponding to last move. */
113 if (!upc
->uct
->local_tree_pseqroot
) {
114 /* Give up then, we have nothing to contribute. */
117 /* Construct probability distribution from possible first
118 * sequence move. Remember that @color is color of the
120 upc
->lnode
= color
== S_BLACK
? upc
->tree
->ltree_white
: upc
->tree
->ltree_black
;
121 if (!upc
->lnode
->children
) {
122 /* We don't even have anything in our tree yet. */
127 /* The probdist has the right structure only if BOARD_GAMMA is defined. */
132 /* Construct probability distribution from lnode children. */
133 /* XXX: How to derive the appropriate gamma? */
134 #define li_value(color, li) (li->u.playouts * (color == S_BLACK ? li->u.value : (1 - li->u.value)))
135 #define li_gamma(color, li) (0.5 + li_value(color, li))
136 struct tree_node
*li
= upc
->lnode
->children
;
138 if (is_pass(li
->coord
)) {
140 /* TODO: Spread tenuki gamma over all moves we don't touch. */
143 for (; li
; li
= li
->sibling
) {
144 if (board_at(b
, li
->coord
) != S_NONE
)
146 probdist_set(pd
, li
->coord
, pd
->items
[li
->coord
] * li_gamma(to_play
, li
));
152 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
153 struct playout_amafmap
*amaf
,
154 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
157 enum stone next_color
= stone_other(node_color
);
158 int parity
= (next_color
== player_color
? 1 : -1);
160 /* If we don't anticipate well the opponent move during pondering
161 * (the played move has few playouts) we still need more memory
162 * during genmove to explore the tree actually played.
163 * For fast_alloc, the tree compaction will free enough memory
165 unsigned long max_tree_size
= u
->max_tree_size
;
166 if (u
->pondering
&& !u
->fast_alloc
)
167 max_tree_size
= (max_tree_size
* (100 - MIN_FREE_MEM_PERCENT
)) / 100;
169 /* We need to make sure only one thread expands the node. If
170 * we are unlucky enough for two threads to meet in the same
171 * node, the latter one will simply do another simulation from
172 * the node itself, no big deal. t->nodes_size may exceed
173 * the maximum in multi-threaded case but not by much so it's ok.
174 * The size test must be before the test&set not after, to allow
175 * expansion of the node later if enough nodes have been freed. */
176 if (n
->u
.playouts
>= u
->expand_p
&& t
->nodes_size
< max_tree_size
177 && !__sync_lock_test_and_set(&n
->is_expanded
, 1)) {
178 tree_expand_node(t
, n
, b
, next_color
, u
, parity
);
181 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
182 spaces
, n
->u
.playouts
, coord2sstr(n
->coord
, t
->board
),
183 tree_node_get_value(t
, parity
, n
->u
.value
));
185 /* TODO: Don't necessarily restart the sequence walk when entering
187 struct uct_playout_callback upc
= { .uct
= u
, .tree
= t
, .lnode
= NULL
};
188 if (u
->local_tree_playout
) {
189 /* N.B.: We know this is ELO playout. */
190 playout_elo_callback(u
->playout
, uct_playout_probdist
, &upc
);
193 struct playout_setup ps
= { .gamelen
= u
->gamelen
, .mercymin
= u
->mercymin
};
194 int result
= play_random_game(&ps
, b
, next_color
,
195 u
->playout_amaf
? amaf
: NULL
,
196 &u
->ownermap
, u
->playout
);
197 if (next_color
== S_WHITE
) {
198 /* We need the result from black's perspective. */
202 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
203 spaces
, player_color
, next_color
, coord2sstr(n
->coord
, t
->board
), result
);
209 scale_value(struct uct
*u
, struct board
*b
, int result
)
211 float rval
= result
> 0;
213 int vp
= u
->val_points
;
215 vp
= board_size(b
) - 1; vp
*= vp
; vp
*= 2;
218 float sval
= (float) abs(result
) / vp
;
219 sval
= sval
> 1 ? 1 : sval
;
220 if (result
< 0) sval
= 1 - sval
;
222 rval
+= u
->val_scale
* sval
;
224 rval
= (1 - u
->val_scale
) * rval
+ u
->val_scale
* sval
;
225 // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
231 record_local_sequence(struct uct
*u
, struct tree
*t
,
232 struct uct_descent
*descent
, int dlen
, int di
,
233 enum stone seq_color
, float rval
)
235 /* Ignore pass sequences. */
236 if (is_pass(descent
[di
].node
->coord
))
239 #define LTREE_DEBUG if (UDEBUGL(6))
240 LTREE_DEBUG
fprintf(stderr
, "recording result %f in local %s sequence: ",
241 rval
, stone2str(seq_color
));
244 /* Pick the right local tree root... */
245 struct tree_node
*lnode
= seq_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
248 /* ...and record the sequence. */
249 while (di
< dlen
&& (di
== di0
|| descent
[di
].node
->d
< u
->tenuki_d
)) {
250 LTREE_DEBUG
fprintf(stderr
, "%s[%d] ",
251 coord2sstr(descent
[di
].node
->coord
, t
->board
),
252 descent
[di
].node
->d
);
253 lnode
= tree_get_node(t
, lnode
, descent
[di
++].node
->coord
, true);
255 stats_add_result(&lnode
->u
, rval
, 1);
258 /* Add lnode for tenuki (pass) if we descended further. */
260 LTREE_DEBUG
fprintf(stderr
, "pass ");
261 lnode
= tree_get_node(t
, lnode
, pass
, true);
263 stats_add_result(&lnode
->u
, rval
, 1);
266 LTREE_DEBUG
fprintf(stderr
, "\n");
271 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
276 struct playout_amafmap
*amaf
= NULL
;
277 if (u
->policy
->wants_amaf
) {
278 amaf
= calloc2(1, sizeof(*amaf
));
279 amaf
->map
= calloc2(board_size2(&b2
) + 1, sizeof(*amaf
->map
));
280 amaf
->map
++; // -1 is pass
283 /* Walk the tree until we find a leaf, then expand it and do
284 * a random playout. */
285 struct tree_node
*n
= t
->root
;
286 enum stone node_color
= stone_other(player_color
);
287 assert(node_color
== t
->root_color
);
289 /* Tree descent history. */
290 /* XXX: This is somewhat messy since @n and descent[dlen-1].node are
293 struct uct_descent descent
[DLEN
];
294 descent
[0].node
= n
; descent
[0].lnode
= NULL
;
296 /* Total value of the sequence. */
297 struct move_stats seq_value
= { .playouts
= 0 };
300 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
301 int passes
= is_pass(b
->last_move
.coord
) && b
->moves
> 0;
305 static char spaces
[] = "\0 ";
308 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
310 while (!tree_leaf_node(n
) && passes
< 2) {
311 spaces
[depth
++] = ' '; spaces
[depth
] = 0;
314 /*** Choose a node to descend to: */
316 /* Parity is chosen already according to the child color, since
317 * it is applied to children. */
318 node_color
= stone_other(node_color
);
319 int parity
= (node_color
== player_color
? 1 : -1);
322 descent
[dlen
] = descent
[dlen
- 1];
323 if (u
->local_tree
&& (!descent
[dlen
].lnode
|| descent
[dlen
].node
->d
>= u
->tenuki_d
)) {
324 /* Start new local sequence. */
325 /* Remember that node_color already holds color of the
326 * to-be-found child. */
327 descent
[dlen
].lnode
= node_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
330 if (!u
->random_policy_chance
|| fast_random(u
->random_policy_chance
))
331 u
->policy
->descend(u
->policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
333 u
->random_policy
->descend(u
->random_policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
336 /*** Perform the descent: */
338 seq_value
.playouts
+= descent
[dlen
].value
.playouts
;
339 seq_value
.value
+= descent
[dlen
].value
.value
* descent
[dlen
].value
.playouts
;
340 n
= descent
[dlen
++].node
;
341 assert(n
== t
->root
|| n
->parent
);
343 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %f\n",
344 spaces
, coord2sstr(n
->coord
, t
->board
), n
->coord
,
345 tree_node_get_value(t
, parity
, n
->u
.value
));
347 /* Add virtual loss if we need to; this is used to discourage
348 * other threads from visiting this node in case of multiple
349 * threads doing the tree search. */
351 stats_add_result(&n
->u
, tree_parity(t
, parity
) > 0 ? 0 : 1, 1);
353 assert(n
->coord
>= -1);
354 if (amaf
&& !is_pass(n
->coord
)) {
355 if (amaf
->map
[n
->coord
] == S_NONE
|| amaf
->map
[n
->coord
] == node_color
) {
356 amaf
->map
[n
->coord
] = node_color
;
357 } else { // XXX: Respect amaf->record_nakade
358 amaf_op(amaf
->map
[n
->coord
], +);
360 amaf
->game
[amaf
->gamelen
].coord
= n
->coord
;
361 amaf
->game
[amaf
->gamelen
].color
= node_color
;
363 assert(amaf
->gamelen
< sizeof(amaf
->game
) / sizeof(amaf
->game
[0]));
366 struct move m
= { n
->coord
, node_color
};
367 int res
= board_play(&b2
, &m
);
369 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
370 || b2
.superko_violation
) {
372 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
373 fprintf(stderr
, "%s<%"PRIhash
"> ", coord2sstr(ni
->coord
, t
->board
), ni
->hash
);
374 fprintf(stderr
, "marking invalid %s node %d,%d res %d group %d spk %d\n",
375 stone2str(node_color
), coord_x(n
->coord
,b
), coord_y(n
->coord
,b
),
376 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
378 n
->hints
|= TREE_HINT_INVALID
;
383 if (is_pass(n
->coord
))
390 amaf
->game_baselen
= amaf
->gamelen
;
391 amaf
->record_nakade
= u
->playout_amaf_nakade
;
394 if (t
->use_extra_komi
&& u
->dynkomi
->persim
) {
395 b2
.komi
+= round(u
->dynkomi
->persim(u
->dynkomi
, &b2
, t
, n
));
399 /* XXX: No dead groups support. */
400 float score
= board_official_score(&b2
, NULL
);
401 /* Result from black's perspective (no matter who
402 * the player; black's perspective is always
403 * what the tree stores. */
404 result
= - (score
* 2);
407 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
408 player_color
, node_color
, coord2sstr(n
->coord
, t
->board
), result
, score
);
410 board_print(&b2
, stderr
);
412 board_ownermap_fill(&u
->ownermap
, &b2
);
414 } else { assert(u
->parallel_tree
|| tree_leaf_node(n
));
415 /* In case of parallel tree search, the assertion might
416 * not hold if two threads chew on the same node. */
417 result
= uct_leaf_node(u
, &b2
, player_color
, amaf
, t
, n
, node_color
, spaces
);
420 if (amaf
&& u
->playout_amaf_cutoff
) {
421 int cutoff
= amaf
->game_baselen
;
422 cutoff
+= (amaf
->gamelen
- amaf
->game_baselen
) * u
->playout_amaf_cutoff
/ 100;
423 /* Now, reconstruct the amaf map. */
424 memset(amaf
->map
, 0, board_size2(&b2
) * sizeof(*amaf
->map
));
425 for (int i
= 0; i
< cutoff
; i
++) {
426 coord_t coord
= amaf
->game
[i
].coord
;
427 enum stone color
= amaf
->game
[i
].color
;
428 if (amaf
->map
[coord
] == S_NONE
|| amaf
->map
[coord
] == color
) {
429 amaf
->map
[coord
] = color
;
430 /* Nakade always recorded for in-tree part */
431 } else if (amaf
->record_nakade
|| i
<= amaf
->game_baselen
) {
432 amaf_op(amaf
->map
[n
->coord
], +);
437 assert(n
== t
->root
|| n
->parent
);
439 float rval
= scale_value(u
, b
, result
);
440 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, amaf
, rval
);
442 if (t
->use_extra_komi
) {
443 stats_add_result(&u
->dynkomi
->score
, result
/ 2, 1);
444 stats_add_result(&u
->dynkomi
->value
, rval
, 1);
447 if (u
->local_tree
&& n
->parent
&& !is_pass(n
->coord
) && dlen
> 0) {
448 /* Possibly transform the rval appropriately. */
449 float expval
= seq_value
.value
/ seq_value
.playouts
;
450 rval
= stats_temper_value(rval
, expval
, u
->local_tree
);
452 /* Get the local sequences and record them in ltree. */
453 /* We will look for sequence starts in our descent
454 * history, then run record_local_sequence() for each
455 * found sequence start; record_local_sequence() may
456 * pick longer sequences from descent history then,
457 * which is expected as it will create new lnodes. */
458 enum stone seq_color
= player_color
;
459 /* First move always starts a sequence. */
460 record_local_sequence(u
, t
, descent
, dlen
, 1, seq_color
, rval
);
461 seq_color
= stone_other(seq_color
);
462 for (int dseqi
= 2; dseqi
< dlen
; dseqi
++, seq_color
= stone_other(seq_color
)) {
463 if (u
->local_tree_allseq
) {
464 /* We are configured to record all subsequences. */
465 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
468 if (descent
[dseqi
].node
->d
>= u
->tenuki_d
) {
469 /* Tenuki! Record the fresh sequence. */
470 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
473 if (descent
[dseqi
].lnode
&& !descent
[dseqi
].lnode
) {
474 /* Record result for in-descent picked sequence. */
475 record_local_sequence(u
, t
, descent
, dlen
, dseqi
, seq_color
, rval
);
483 /* We need to undo the virtual loss we added during descend. */
484 if (u
->virtual_loss
) {
485 int parity
= (node_color
== player_color
? 1 : -1);
486 for (; n
->parent
; n
= n
->parent
) {
487 stats_rm_result(&n
->u
, tree_parity(t
, parity
) > 0 ? 0 : 1, 1);
496 board_done_noalloc(&b2
);
501 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
)
504 for (i
= 0; !uct_halt
; i
++)
505 uct_playout(u
, b
, color
, t
);