uct/internal.h

   1 #ifndef ZZGO_UCT_INTERNAL_H
   2 #define ZZGO_UCT_INTERNAL_H
   3
   4 #include <signal.h> // sig_atomic_t
   5
   6 #include "debug.h"
   7 #include "move.h"
   8 #include "ownermap.h"
   9 #include "playout.h"
  10 #include "stats.h"
  11
  12 struct tree;
  13 struct tree_node;
  14 struct uct_policy;
  15 struct uct_prior;
  16
  17 /* Internal UCT structures */
  18
  19
  20 /* Internal engine state. */
  21 struct uct {
  22         int debug_level;
  23         int games, gamelen;
  24         float resign_ratio;
  25         float loss_threshold;
  26         double best2_ratio, bestr_ratio;
  27         bool pass_all_alive;
  28         bool territory_scoring;
  29         int expand_p;
  30         bool playout_amaf, playout_amaf_nakade;
  31         bool amaf_prior;
  32         int playout_amaf_cutoff;
  33         int dumpthres;
  34         int force_seed;
  35         bool no_book;
  36         bool fast_alloc;
  37         unsigned long max_tree_size;
  38         int mercymin;
  39
  40         int threads;
  41         enum uct_thread_model {
  42                 TM_ROOT, /* Root parallelization. */
  43                 TM_TREE, /* Tree parallelization w/o virtual loss. */
  44                 TM_TREEVL, /* Tree parallelization with virtual loss. */
  45         } thread_model;
  46         bool parallel_tree;
  47         bool virtual_loss;
  48         bool pondering_opt; /* User wants pondering */
  49         bool pondering; /* Actually pondering now */
  50
  51         int fuseki_end;
  52         int yose_start;
  53
  54         int dynkomi;
  55         int dynkomi_mask;
  56         int handicap_value;
  57
  58         float val_scale;
  59         int val_points;
  60         bool val_extra;
  61
  62         int random_policy_chance;
  63         int local_tree;
  64         int tenuki_d;
  65         float local_tree_aging;
  66         bool local_tree_allseq;
  67
  68         char *banner;
  69
  70         struct uct_policy *policy;
  71         struct uct_policy *random_policy;
  72         struct playout_policy *playout;
  73         struct uct_prior *prior;
  74
  75         /* Used within frame of single genmove. */
  76         struct board_ownermap ownermap;
  77
  78         /* Game state - maintained by setup_state(), reset_state(). */
  79         struct tree *t;
  80 };
  81
  82 #define UDEBUGL(n) DEBUGL_(u->debug_level, n)
  83
  84 extern volatile sig_atomic_t uct_halt;
  85 extern __thread int thread_id;
  86
  87 bool uct_pass_is_safe(struct uct *u, struct board *b, enum stone color, bool pass_all_alive);
  88
  89
  90 /* This is the state used for descending the tree; we use this wrapper
  91  * structure in order to be able to easily descend in multiple trees
  92  * in parallel (e.g. main tree and local tree) or compute cummulative
  93  * "path value" throughout the tree descent. */
  94 struct uct_descent {
  95         /* Active tree nodes: */
  96         struct tree_node *node; /* Main tree. */
  97         struct tree_node *lnode; /* Local tree. */
  98         /* Value of main tree node (with all value factors, but unbiased
  99          * - without exploration factor), from black's perspective. */
 100         struct move_stats value;
 101 };
 102
 103
 104 typedef struct tree_node *(*uctp_choose)(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude);
 105 typedef float (*uctp_evaluate)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity);
 106 typedef void (*uctp_descend)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity, bool allow_pass);
 107 typedef void (*uctp_winner)(struct uct_policy *p, struct tree *tree, struct uct_descent *descent);
 108 typedef void (*uctp_prior)(struct uct_policy *p, struct tree *tree, struct tree_node *node, struct board *b, enum stone color, int parity);
 109 typedef void (*uctp_update)(struct uct_policy *p, struct tree *tree, struct tree_node *node, enum stone node_color, enum stone player_color, struct playout_amafmap *amaf, float result);
 110
 111 struct uct_policy {
 112         struct uct *uct;
 113         uctp_choose choose;
 114         uctp_winner winner;
 115         uctp_evaluate evaluate;
 116         uctp_descend descend;
 117         uctp_update update;
 118         uctp_prior prior;
 119         bool wants_amaf;
 120         void *data;
 121 };
 122
 123 #endif