4 #define MAX_GAMELEN 600
10 struct board_ownermap
;
13 /** Playout policy interface: */
15 struct playout_policy
;
18 /* Initialize policy data structures for new playout; subsequent choose calls
19 * (but not assess/permit calls!) will all be made on the same board; if
20 * setboard is used, it is guaranteed that choose will pick all moves played
21 * on the board subsequently. The routine is expected to initialize b->ps
22 * with internal data. At the playout end, b->ps will be simply free()d,
23 * so make sure all data is within single allocated block. */
24 typedef void (*playoutp_setboard
)(struct playout_policy
*playout_policy
, struct board
*b
);
26 /* Pick the next playout simulation move. */
27 typedef coord_t (*playoutp_choose
)(struct playout_policy
*playout_policy
, struct playout_setup
*playout_setup
, struct board
*b
, enum stone to_play
);
29 /* Set number of won (>0) or lost (<0) games for each considerable
30 * move (usually a proportion of @games); can leave some untouched
31 * if policy has no opinion. The number must have proper parity;
32 * just use uct/prior.h:add_prior_value(). */
33 typedef void (*playoutp_assess
)(struct playout_policy
*playout_policy
, struct prior_map
*map
, int games
);
35 /* Allow play of randomly selected move. */
36 typedef bool (*playoutp_permit
)(struct playout_policy
*playout_policy
, struct board
*b
, struct move
*m
);
38 /* Tear down the policy state; policy and policy->data will be free()d by caller. */
39 typedef void (*playoutp_done
)(struct playout_policy
*playout_policy
);
41 struct playout_policy
{
43 /* We call setboard when we start new playout.
44 * We call choose when we ask policy about next move.
45 * We call assess when we ask policy about how good given move is.
46 * We call permit when we ask policy if we can make a randomly chosen move. */
47 playoutp_setboard setboard
;
48 playoutp_choose choose
;
49 playoutp_assess assess
;
50 playoutp_permit permit
;
52 /* Particular playout policy's internal data. */
57 /** Playout engine interface: */
59 /* Engine hook for forcing moves before doing policy decision.
60 * Return pass to forward to policy. */
61 typedef coord_t (*playouth_prepolicy
)(struct playout_policy
*playout_policy
, struct playout_setup
*setup
, struct board
*b
, enum stone color
);
63 /* Engine hook for choosing moves in case policy did not choose
65 * Return pass to forward to uniformly random selection. */
66 typedef coord_t (*playouth_postpolicy
)(struct playout_policy
*playout_policy
, struct playout_setup
*setup
, struct board
*b
, enum stone color
);
68 struct playout_setup
{
69 unsigned int gamelen
; /* Maximal # of moves in playout. */
70 /* Minimal difference between captures to terminate the playout.
71 * 0 means don't check. */
72 unsigned int mercymin
;
74 void *hook_data
; // for hook to reference its state
75 playouth_prepolicy prepolicy_hook
;
76 playouth_postpolicy postpolicy_hook
;
80 struct playout_amafmap
{
81 /* Record of the random playout - for each intersection:
82 * S_NONE: This move was never played
83 * S_BLACK: This move was played by black first
84 * S_WHITE: This move was played by white first
86 enum stone
*map
; // [board_size2(b)]
88 /* the lowest &0xf is the enum stone, upper bits are nakade
89 * counter - in case of nakade, we record only color of the
90 * first stone played inside, but count further throwins
91 * and ignore AMAF value after these. */
92 #define amaf_nakade(item_) (item_ >> 8)
93 #define amaf_op(item_, op_) do { \
95 item_ = (mi_ & 0xf) | ((amaf_nakade(mi_) op_ 1) << 8); \
98 /* Additionally, we keep record of the game so that we can
99 * examine nakade moves; really going out of our way to
100 * implement nakade AMAF properly turns out to be crucial
101 * when reading some tactical positions in depth (even if
102 * they are just one-stone-snapback). */
103 struct move game
[MAX_GAMELEN
+ 1];
104 unsigned int gamelen
;
105 /* Our current position in the game sequence; in AMAF, we search
106 * the range [game_baselen, gamelen]. */
107 unsigned int game_baselen
;
109 /* Whether to record the nakade moves (true) or just completely
110 * ignore them (false; just the first color on the intersection
111 * is stored in the map, nakade counter is not incremented; game
112 * record is still kept). */
117 /* >0: starting_color wins, <0: starting_color loses; the actual
118 * number is a DOUBLE of the score difference
119 * 0: superko inside the game tree (XXX: jigo not handled) */
120 int play_random_game(struct playout_setup
*setup
,
121 struct board
*b
, enum stone starting_color
,
122 struct playout_amafmap
*amafmap
,
123 struct board_ownermap
*ownermap
,
124 struct playout_policy
*policy
);
126 coord_t
play_random_move(struct playout_setup
*setup
,
127 struct board
*b
, enum stone color
,
128 struct playout_policy
*policy
);