1 #ifndef ZZGO_UCT_INTERNAL_H
2 #define ZZGO_UCT_INTERNAL_H
4 #include <signal.h> // sig_atomic_t
12 #include "distributed/distributed.h"
20 /* Internal UCT structures */
22 /* How often to inspect the tree from the main thread to check for playout
23 * stop, progress reports, etc. (in seconds) */
24 #define TREE_BUSYWAIT_INTERVAL 0.1 /* 100ms */
26 /* Distributed stats for each child of the root node. */
28 struct move_stats2 last_sent_own
;
29 struct move_stats2 added_from_others
;
30 struct tree_node
*node
;
33 /* Internal engine state. */
39 double best2_ratio
, bestr_ratio
;
41 bool territory_scoring
;
43 bool playout_amaf
, playout_amaf_nakade
;
45 int playout_amaf_cutoff
;
50 unsigned long max_tree_size
;
54 enum uct_thread_model
{
55 TM_ROOT
, /* Root parallelization. */
56 TM_TREE
, /* Tree parallelization w/o virtual loss. */
57 TM_TREEVL
, /* Tree parallelization with virtual loss. */
61 bool pondering_opt
; /* User wants pondering */
62 bool pondering
; /* Actually pondering now */
63 bool slave
; /* Act as slave in distributed engine. */
71 struct uct_dynkomi
*dynkomi
;
77 int random_policy_chance
;
80 float local_tree_aging
;
81 bool local_tree_allseq
;
82 /* Playout-localtree integration. */
83 bool local_tree_playout
; // can be true only if ELO playout
84 bool local_tree_pseqroot
;
88 struct uct_policy
*policy
;
89 struct uct_policy
*random_policy
;
90 struct playout_policy
*playout
;
91 struct uct_prior
*prior
;
93 /* Used within frame of single genmove. */
94 struct board_ownermap ownermap
;
95 /* Used for coordination among slaves of the distributed engine. */
96 struct node_stats
*stats
;
98 int played_all
; /* games played by all slaves */
100 /* Game state - maintained by setup_state(), reset_state(). */
104 #define UDEBUGL(n) DEBUGL_(u->debug_level, n)
106 extern volatile sig_atomic_t uct_halt
;
107 extern __thread
int thread_id
;
108 extern bool thread_manager_running
;
110 bool uct_pass_is_safe(struct uct
*u
, struct board
*b
, enum stone color
, bool pass_all_alive
);
112 void uct_prepare_move(struct uct
*u
, struct board
*b
, enum stone color
);
113 void uct_search_setup(struct uct
*u
, struct board
*b
, enum stone color
);
115 /* Progress information of the on-going MCTS search - when did we
116 * last adjusted dynkomi, printed out stuff, etc. */
118 struct uct_search_state
{
119 /* Number of games simulated for this simulation before
120 * we started the search. (We have simulated them earlier.) */
122 /* Number of last dynkomi adjustment. */
124 /* Number of last game with progress print. */
126 /* Number of simulations to wait before next print. */
128 /* Printed notification about full memory? */
131 struct time_stop stop
;
132 struct spawn_ctx
*ctx
;
134 int uct_search_games(struct uct_search_state
*s
);
135 void uct_search_start(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
, struct uct_search_state
*s
);
136 void uct_search_progress(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
, struct uct_search_state
*s
, int i
);
137 bool uct_search_check_stop(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
, struct uct_search_state
*s
, int i
);
138 struct tree_node
*uct_search_best(struct uct
*u
, struct board
*b
, enum stone color
, bool pass_all_alive
, int played_games
, int base_playouts
, coord_t
*best_coord
);
141 /* This is the state used for descending the tree; we use this wrapper
142 * structure in order to be able to easily descend in multiple trees
143 * in parallel (e.g. main tree and local tree) or compute cummulative
144 * "path value" throughout the tree descent. */
146 /* Active tree nodes: */
147 struct tree_node
*node
; /* Main tree. */
148 struct tree_node
*lnode
; /* Local tree. */
149 /* Value of main tree node (with all value factors, but unbiased
150 * - without exploration factor), from black's perspective. */
151 struct move_stats value
;
155 typedef struct tree_node
*(*uctp_choose
)(struct uct_policy
*p
, struct tree_node
*node
, struct board
*b
, enum stone color
, coord_t exclude
);
156 typedef float (*uctp_evaluate
)(struct uct_policy
*p
, struct tree
*tree
, struct uct_descent
*descent
, int parity
);
157 typedef void (*uctp_descend
)(struct uct_policy
*p
, struct tree
*tree
, struct uct_descent
*descent
, int parity
, bool allow_pass
);
158 typedef void (*uctp_winner
)(struct uct_policy
*p
, struct tree
*tree
, struct uct_descent
*descent
);
159 typedef void (*uctp_prior
)(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, struct board
*b
, enum stone color
, int parity
);
160 typedef void (*uctp_update
)(struct uct_policy
*p
, struct tree
*tree
, struct tree_node
*node
, enum stone node_color
, enum stone player_color
, struct playout_amafmap
*amaf
, float result
);
166 uctp_evaluate evaluate
;
167 uctp_descend descend
;