uct_bestmove(): Dissolve back in uct_genmove()
[pachi/json.git] / uct / dynkomi.h
blob6a5a56be5e2f876ecd13adea062a264b832657c3
1 #ifndef ZZGO_UCT_DYNKOMI_H
2 #define ZZGO_UCT_DYNKOMI_H
4 /* Dynamic computation of artificial komi values to stabilize the MCTS. */
6 #include "move.h"
7 #include "uct/internal.h"
8 #include "uct/tree.h"
10 /* Motivation: Monte Carlo Tree Search tends to produce unstable and
11 * unreasonable results when playing in situation of extreme advantage
12 * or * disadvantage, due to poor move selection becauce of low
13 * signal-to-noise * ratio; notably, this occurs when playing in high
14 * handicap game, burdening the computer with further disadvantage
15 * against the strong human opponent. */
17 /* Here, we try to solve the problem by adding arbitrarily computed
18 * komi values to the score. The used algorithm is transparent to the
19 * rest of UCT implementation. */
21 struct board;
22 struct tree;
23 struct tree_node;
24 struct uct;
25 struct uct_dynkomi;
27 /* Determine base dynamic komi for this genmove run. The returned
28 * value is stored in tree->extra_komi and by itself used just for
29 * user information. */
30 typedef float (*uctd_permove)(struct uct_dynkomi *d, struct board *b, struct tree *tree);
31 /* Determine actual dynamic komi for this simulation (run on board @b
32 * from node @node). In some cases, this function will just return
33 * tree->extra_komi, in other cases it might want to adjust the komi
34 * according to the actual move depth. */
35 typedef float (*uctd_persim)(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node);
36 /* Destroy the uct_dynkomi structure. */
37 typedef void (*uctd_done)(struct uct_dynkomi *d);
39 struct uct_dynkomi {
40 struct uct *uct;
41 uctd_permove permove;
42 uctd_persim persim;
43 uctd_done done;
44 void *data;
46 /* Game state for dynkomi use: */
47 /* Information on average score at the simulation end (black's
48 * perspective) since last dynkomi adjustment. */
49 struct move_stats score;
50 /* Information on average winrate of simulations since last
51 * dynkomi adjustment. */
52 struct move_stats value;
55 struct uct_dynkomi *uct_dynkomi_init_none(struct uct *u, char *arg, struct board *b);
56 struct uct_dynkomi *uct_dynkomi_init_linear(struct uct *u, char *arg, struct board *b);
57 struct uct_dynkomi *uct_dynkomi_init_adaptive(struct uct *u, char *arg, struct board *b);
59 #endif