playout.h

   1 #ifndef ZZGO_PLAYOUT_H
   2 #define ZZGO_PLAYOUT_H
   3
   4 #define MAX_GAMELEN 600
   5
   6 struct board;
   7 struct move;
   8 enum stone;
   9 struct prior_map;
  10 struct board_ownermap;
  11
  12
  13 /** Playout policy interface: */
  14
  15 struct playout_policy;
  16 /* Initialize policy data structures for new playout; subsequent choose calls
  17  * (but not assess/permit calls!) will all be made on the same board; if
  18  * setboard is used, it is guaranteed that choose will pick all moves played
  19  * on the board subsequently. The routine is expected to initialize b->ps
  20  * with internal data. At the playout end, b->ps will be simply free()d,
  21  * so make sure all data is within single allocated block. */
  22 typedef void (*playoutp_setboard)(struct playout_policy *playout_policy, struct board *b);
  23 /* Pick the next playout simulation move. */
  24 typedef coord_t (*playoutp_choose)(struct playout_policy *playout_policy, struct board *b, enum stone to_play);
  25 /* Set number of won (>0) or lost (<0) games for each considerable
  26  * move (usually a proportion of @games); can leave some untouched
  27  * if policy has no opinion. The number must have proper parity;
  28  * just use uct/prior.h:add_prior_value(). */
  29 typedef void (*playoutp_assess)(struct playout_policy *playout_policy, struct prior_map *map, int games);
  30 /* Allow play of randomly selected move. */
  31 typedef bool (*playoutp_permit)(struct playout_policy *playout_policy, struct board *b, struct move *m);
  32 /* Tear down the policy state; policy and policy->data will be free()d by caller. */
  33 typedef void (*playoutp_done)(struct playout_policy *playout_policy);
  34
  35 struct playout_policy {
  36         int debug_level;
  37         /* We call setboard when we start new playout.
  38          * We call choose when we ask policy about next move.
  39          * We call assess when we ask policy about how good given move is.
  40          * We call permit when we ask policy if we can make a randomly chosen move. */
  41         playoutp_setboard setboard;
  42         playoutp_choose choose;
  43         playoutp_assess assess;
  44         playoutp_permit permit;
  45         playoutp_done done;
  46         /* Particular playout policy's internal data. */
  47         void *data;
  48 };
  49
  50
  51 /** Playout engine interface: */
  52
  53 struct playout_setup {
  54         int gamelen; /* Maximal # of moves in playout. */
  55         /* Minimal difference between captures to terminate the playout.
  56          * 0 means don't check. */
  57         int mercymin;
  58
  59         /* XXX: We used to have more, perhaps we will again have more
  60          * in the future. */
  61 };
  62
  63
  64 struct playout_amafmap {
  65         /* Record of the random playout - for each intersection:
  66          * S_NONE: This move was never played
  67          * S_BLACK: This move was played by black first
  68          * S_WHITE: This move was played by white first
  69          */
  70         enum stone *map; // [board_size2(b)]
  71
  72         /* the lowest &0xf is the enum stone, upper bits are nakade
  73          * counter - in case of nakade, we record only color of the
  74          * first stone played inside, but count further throwins
  75          * and ignore AMAF value after these. */
  76 #define amaf_nakade(item_) (item_ >> 8)
  77 #define amaf_op(item_, op_) do { \
  78                 int mi_ = item_; \
  79                 item_ = (mi_ & 0xf) | ((amaf_nakade(mi_) op_ 1) << 8); \
  80 } while (0)
  81
  82         /* Additionally, we keep record of the game so that we can
  83          * examine nakade moves; really going out of our way to
  84          * implement nakade AMAF properly turns out to be crucial
  85          * when reading some tactical positions in depth (even if
  86          * they are just one-stone-snapback). */
  87         struct move game[MAX_GAMELEN + 1];
  88         int gamelen;
  89         /* Our current position in the game sequence; in AMAF, we search
  90          * the range [game_baselen, gamelen]. */
  91         int game_baselen;
  92
  93         /* Whether to record the nakade moves (true) or just completely
  94          * ignore them (false; just the first color on the intersection
  95          * is stored in the map, nakade counter is not incremented; game
  96          * record is still kept). */
  97         bool record_nakade;
  98 };
  99
 100
 101 /* >0: starting_color wins, <0: starting_color loses; the actual
 102  * number is a DOUBLE of the score difference
 103  * 0: superko inside the game tree (XXX: jigo not handled) */
 104 int play_random_game(struct playout_setup *setup,
 105                      struct board *b, enum stone starting_color,
 106                      struct playout_amafmap *amafmap,
 107                      struct board_ownermap *ownermap,
 108                      struct playout_policy *policy);
 109
 110 #endif