playout.h

   1 #ifndef PACHI_PLAYOUT_H
   2 #define PACHI_PLAYOUT_H
   3
   4 #define MAX_GAMELEN 600
   5
   6 struct board;
   7 struct move;
   8 enum stone;
   9 struct prior_map;
  10 struct board_ownermap;
  11
  12
  13 /** Playout policy interface: */
  14
  15 struct playout_policy;
  16 struct playout_setup;
  17 struct playout_amafmap;
  18
  19 /* Initialize policy data structures for new playout; subsequent choose calls
  20  * (but not assess/permit calls!) will all be made on the same board; if
  21  * setboard is used, it is guaranteed that choose will pick all moves played
  22  * on the board subsequently. The routine is expected to initialize b->ps
  23  * with internal data. At the playout end, b->ps will be simply free()d,
  24  * so make sure all data is within single allocated block. */
  25 typedef void (*playoutp_setboard)(struct playout_policy *playout_policy, struct board *b);
  26
  27 /* Pick the next playout simulation move. */
  28 typedef coord_t (*playoutp_choose)(struct playout_policy *playout_policy, struct playout_setup *playout_setup, struct playout_amafmap *amafmap, struct board *b, enum stone to_play);
  29
  30 /* Set number of won (>0) or lost (<0) games for each considerable
  31  * move (usually a proportion of @games); can leave some untouched
  32  * if policy has no opinion. The number must have proper parity;
  33  * just use uct/prior.h:add_prior_value(). */
  34 typedef void (*playoutp_assess)(struct playout_policy *playout_policy, struct prior_map *map, int games);
  35
  36 /* Allow play of randomly selected move. */
  37 typedef bool (*playoutp_permit)(struct playout_policy *playout_policy, struct board *b, struct move *m);
  38
  39 /* Tear down the policy state; policy and policy->data will be free()d by caller. */
  40 typedef void (*playoutp_done)(struct playout_policy *playout_policy);
  41
  42 struct pattern_setup;
  43 struct playout_policy {
  44         int debug_level;
  45         /* We call setboard when we start new playout.
  46          * We call choose when we ask policy about next move.
  47          * We call assess when we ask policy about how good given move is.
  48          * We call permit when we ask policy if we can make a randomly chosen move. */
  49         playoutp_setboard setboard;
  50         playoutp_choose choose;
  51         playoutp_assess assess;
  52         playoutp_permit permit;
  53         playoutp_done done;
  54         /* Particular playout policy's internal data. */
  55         void *data;
  56         /* Gateway for pattern database. */
  57         bool want_pat;
  58         struct pattern_setup *pat;
  59 };
  60
  61
  62 /** Playout engine interface: */
  63
  64 /* Engine hook for forcing moves before doing policy decision.
  65  * Return pass to forward to policy. */
  66 typedef coord_t (*playouth_prepolicy)(struct playout_policy *playout_policy, struct playout_setup *setup, struct board *b, enum stone color);
  67
  68 /* Engine hook for choosing moves in case policy did not choose
  69  * a move.
  70  * Return pass to forward to uniformly random selection. */
  71 typedef coord_t (*playouth_postpolicy)(struct playout_policy *playout_policy, struct playout_setup *setup, struct board *b, enum stone color);
  72
  73 struct playout_setup {
  74         unsigned int gamelen; /* Maximal # of moves in playout. */
  75         /* Minimal difference between captures to terminate the playout.
  76          * 0 means don't check. */
  77         int mercymin;
  78
  79         void *hook_data; // for hook to reference its state
  80         playouth_prepolicy prepolicy_hook;
  81         playouth_postpolicy postpolicy_hook;
  82 };
  83
  84
  85 struct playout_amafmap {
  86         /* We keep record of the game so that we can
  87          * examine nakade moves; really going out of our way to
  88          * implement nakade AMAF properly turns out to be crucial
  89          * when reading some tactical positions in depth (even if
  90          * they are just one-stone-snapback). */
  91         coord_t game[MAX_GAMELEN];
  92         bool is_ko_capture[MAX_GAMELEN];
  93         int gamelen;
  94         /* Our current position in the game sequence; in AMAF, we search
  95          * the range [game_baselen, gamelen[ */
  96         int game_baselen;
  97 };
  98
  99
 100 /* >0: starting_color wins, <0: starting_color loses; the actual
 101  * number is a DOUBLE of the score difference
 102  * 0: superko inside the game tree (XXX: jigo not handled) */
 103 int play_random_game(struct playout_setup *setup,
 104                      struct board *b, enum stone starting_color,
 105                      struct playout_amafmap *amafmap,
 106                      struct board_ownermap *ownermap,
 107                      struct playout_policy *policy);
 108
 109 coord_t play_random_move(struct playout_setup *setup, struct playout_amafmap *amafmap,
 110                          struct board *b, enum stone color,
 111                          struct playout_policy *policy);
 112
 113 #endif