Merge remote-tracking branch 'origin/master' into patterns
[pachi/t.git] / uct / dynkomi.h
blob7cff32fd381a089a1a8d8cb45e995587a6c698be
1 #ifndef PACHI_UCT_DYNKOMI_H
2 #define PACHI_UCT_DYNKOMI_H
4 /* Dynamic computation of artificial komi values to stabilize the MCTS. */
6 #include "move.h"
7 #include "uct/internal.h"
8 #include "uct/tree.h"
10 /* Motivation: Monte Carlo Tree Search tends to produce unstable and
11 * unreasonable results when playing in situation of extreme advantage
12 * or * disadvantage, due to poor move selection becauce of low
13 * signal-to-noise * ratio; notably, this occurs when playing in high
14 * handicap game, burdening the computer with further disadvantage
15 * against the strong human opponent. */
17 /* Here, we try to solve the problem by adding arbitrarily computed
18 * komi values to the score. The used algorithm is transparent to the
19 * rest of UCT implementation. */
21 struct board;
22 struct tree;
23 struct tree_node;
24 struct uct;
25 struct uct_dynkomi;
27 /* Compute effective komi value for given color: Positive value
28 * means giving komi, negative value means taking komi. */
29 #define komi_by_color(komi, color) ((color) == S_BLACK ? (komi) : -(komi))
31 /* Determine base dynamic komi for this genmove run. The returned
32 * value is stored in tree->extra_komi and by itself used just for
33 * user information. */
34 typedef floating_t (*uctd_permove)(struct uct_dynkomi *d, struct board *b, struct tree *tree);
35 /* Determine actual dynamic komi for this simulation (run on board @b
36 * from node @node). In some cases, this function will just return
37 * tree->extra_komi, in other cases it might want to adjust the komi
38 * according to the actual move depth. */
39 typedef floating_t (*uctd_persim)(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node);
40 /* Destroy the uct_dynkomi structure. */
41 typedef void (*uctd_done)(struct uct_dynkomi *d);
43 struct uct_dynkomi {
44 struct uct *uct;
45 uctd_permove permove;
46 uctd_persim persim;
47 uctd_done done;
48 void *data;
50 /* Game state for dynkomi use: */
51 /* Information on average score at the simulation end (black's
52 * perspective) since last dynkomi adjustment. */
53 struct move_stats score;
54 /* Information on average winrate of simulations since last
55 * dynkomi adjustment. */
56 struct move_stats value;
59 struct uct_dynkomi *uct_dynkomi_init_none(struct uct *u, char *arg, struct board *b);
60 struct uct_dynkomi *uct_dynkomi_init_linear(struct uct *u, char *arg, struct board *b);
61 struct uct_dynkomi *uct_dynkomi_init_adaptive(struct uct *u, char *arg, struct board *b);
63 #endif