uct/dynkomi.h

   1 #ifndef PACHI_UCT_DYNKOMI_H
   2 #define PACHI_UCT_DYNKOMI_H
   3
   4 /* Dynamic computation of artificial komi values to stabilize the MCTS. */
   5
   6 #include "move.h"
   7 #include "uct/internal.h"
   8 #include "uct/tree.h"
   9
  10 /* Motivation: Monte Carlo Tree Search tends to produce unstable and
  11  * unreasonable results when playing in situation of extreme advantage
  12  * or * disadvantage, due to poor move selection becauce of low
  13  * signal-to-noise * ratio; notably, this occurs when playing in high
  14  * handicap game, burdening the computer with further disadvantage
  15  * against the strong human opponent. */
  16
  17 /* Here, we try to solve the problem by adding arbitrarily computed
  18  * komi values to the score. The used algorithm is transparent to the
  19  * rest of UCT implementation. */
  20
  21 struct board;
  22 struct tree;
  23 struct tree_node;
  24 struct uct;
  25 struct uct_dynkomi;
  26
  27 /* Compute effective komi value for given color: Positive value
  28  * means giving komi, negative value means taking komi. */
  29 #define komi_by_color(komi, color) ((color) == S_BLACK ? (komi) : -(komi))
  30
  31 /* Determine base dynamic komi for this genmove run. The returned
  32  * value is stored in tree->extra_komi and by itself used just for
  33  * user information. */
  34 typedef floating_t (*uctd_permove)(struct uct_dynkomi *d, struct board *b, struct tree *tree);
  35 /* Determine actual dynamic komi for this simulation (run on board @b
  36  * from node @node). In some cases, this function will just return
  37  * tree->extra_komi, in other cases it might want to adjust the komi
  38  * according to the actual move depth. */
  39 typedef floating_t (*uctd_persim)(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node);
  40 /* Destroy the uct_dynkomi structure. */
  41 typedef void (*uctd_done)(struct uct_dynkomi *d);
  42
  43 struct uct_dynkomi {
  44         struct uct *uct;
  45         uctd_permove permove;
  46         uctd_persim persim;
  47         uctd_done done;
  48         void *data;
  49
  50         /* Game state for dynkomi use: */
  51         /* Information on average score at the simulation end (black's
  52          * perspective) since last dynkomi adjustment. */
  53         struct move_stats score;
  54         /* Information on average winrate of simulations since last
  55          * dynkomi adjustment. */
  56         struct move_stats value;
  57 };
  58
  59 struct uct_dynkomi *uct_dynkomi_init_none(struct uct *u, char *arg, struct board *b);
  60 struct uct_dynkomi *uct_dynkomi_init_linear(struct uct *u, char *arg, struct board *b);
  61 struct uct_dynkomi *uct_dynkomi_init_adaptive(struct uct *u, char *arg, struct board *b);
  62
  63 #endif