1 #ifndef ZZGO_UCT_DYNKOMI_H
2 #define ZZGO_UCT_DYNKOMI_H
4 /* Dynamic computation of artificial komi values to stabilize the MCTS. */
7 #include "uct/internal.h"
10 /* Motivation: Monte Carlo Tree Search tends to produce unstable and
11 * unreasonable results when playing in situation of extreme advantage
12 * or * disadvantage, due to poor move selection becauce of low
13 * signal-to-noise * ratio; notably, this occurs when playing in high
14 * handicap game, burdening the computer with further disadvantage
15 * against the strong human opponent. */
17 /* Here, we try to solve the problem by adding arbitrarily computed
18 * komi values to the score. The used algorithm is transparent to the
19 * rest of UCT implementation. */
27 /* Compute effective komi value for given color: Positive value
28 * means giving komi, negative value means taking komi. */
29 #define komi_by_color(komi, color) ((color) == S_BLACK ? (komi) : -(komi))
31 /* Determine base dynamic komi for this genmove run. The returned
32 * value is stored in tree->extra_komi and by itself used just for
33 * user information. */
34 typedef floating_t (*uctd_permove
)(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
);
35 /* Determine actual dynamic komi for this simulation (run on board @b
36 * from node @node). In some cases, this function will just return
37 * tree->extra_komi, in other cases it might want to adjust the komi
38 * according to the actual move depth. */
39 typedef floating_t (*uctd_persim
)(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
, struct tree_node
*node
);
40 /* Destroy the uct_dynkomi structure. */
41 typedef void (*uctd_done
)(struct uct_dynkomi
*d
);
50 /* Game state for dynkomi use: */
51 /* Information on average score at the simulation end (black's
52 * perspective) since last dynkomi adjustment. */
53 struct move_stats score
;
54 /* Information on average winrate of simulations since last
55 * dynkomi adjustment. */
56 struct move_stats value
;
59 struct uct_dynkomi
*uct_dynkomi_init_none(struct uct
*u
, char *arg
, struct board
*b
);
60 struct uct_dynkomi
*uct_dynkomi_init_linear(struct uct
*u
, char *arg
, struct board
*b
);
61 struct uct_dynkomi
*uct_dynkomi_init_adaptive(struct uct
*u
, char *arg
, struct board
*b
);