playout.c

   1 #define DEBUG
   2 #include <assert.h>
   3 #include <math.h>
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <string.h>
   7
   8 #include "board.h"
   9 #include "debug.h"
  10 #include "engine.h"
  11 #include "move.h"
  12 #include "ownermap.h"
  13 #include "playout.h"
  14
  15 /* Whether to set global debug level to the same as the playout
  16  * has, in case it is different. This can make sure e.g. tactical
  17  * reading produces proper level of debug prints during simulations.
  18  * But it is safe to enable this only in single-threaded instances! */
  19 //#define DEBUGL_BY_PLAYOUT
  20
  21 #define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
  22
  23
  24 int
  25 play_random_game(struct playout_setup *setup,
  26                  struct board *b, enum stone starting_color,
  27                  struct playout_amafmap *amafmap,
  28                  struct board_ownermap *ownermap,
  29                  struct playout_policy *policy)
  30 {
  31         assert(setup && policy);
  32
  33         int gamelen = setup->gamelen - b->moves;
  34         if (gamelen < 10)
  35                 gamelen = 10;
  36
  37         if (policy->setboard)
  38                 policy->setboard(policy, b);
  39 #ifdef DEBUGL_BY_PLAYOUT
  40         int debug_level_orig = debug_level;
  41         debug_level = policy->debug_level;
  42 #endif
  43
  44         enum stone color = starting_color;
  45
  46         int passes = is_pass(b->last_move.coord) && b->moves > 0;
  47
  48         while (gamelen-- && passes < 2) {
  49                 coord_t coord = pass;
  50
  51                 if (setup->prepolicy_hook) {
  52                         coord = setup->prepolicy_hook(policy, setup, b, color);
  53                         // fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
  54                 }
  55
  56                 if (is_pass(coord)) {
  57                         coord = policy->choose(policy, setup, b, color);
  58                         // fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
  59                 }
  60
  61                 if (is_pass(coord) && setup->postpolicy_hook) {
  62                         coord = setup->postpolicy_hook(policy, setup, b, color);
  63                         // fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
  64                 }
  65
  66                 if (is_pass(coord)) {
  67 play_random:
  68                         /* Defer to uniformly random move choice. */
  69                         /* This must never happen if the policy is tracking
  70                          * internal board state, obviously. */
  71                         assert(!policy->setboard);
  72                         board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy);
  73
  74                 } else {
  75                         struct move m;
  76                         m.coord = coord; m.color = color;
  77                         if (board_play(b, &m) < 0) {
  78                                 if (PLDEBUGL(4)) {
  79                                         fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
  80                                                 coord_x(coord, b), coord_y(coord, b));
  81                                         board_print(b, stderr);
  82                                 }
  83                                 goto play_random;
  84                         }
  85                 }
  86
  87 #if 0
  88                 /* For UCT, superko test here is downright harmful since
  89                  * in superko-likely situation we throw away literally
  90                  * 95% of our playouts; UCT will deal with this fine by
  91                  * itself. */
  92                 if (unlikely(b->superko_violation)) {
  93                         /* We ignore superko violations that are suicides. These
  94                          * are common only at the end of the game and are
  95                          * rather harmless. (They will not go through as a root
  96                          * move anyway.) */
  97                         if (group_at(b, coord)) {
  98                                 if (DEBUGL(3)) {
  99                                         fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
 100                                         if (DEBUGL(4))
 101                                                 board_print(b, stderr);
 102                                 }
 103                                 return 0;
 104                         } else {
 105                                 if (DEBUGL(6)) {
 106                                         fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
 107                                         board_print(b, stderr);
 108                                 }
 109                                 b->superko_violation = false;
 110                         }
 111                 }
 112 #endif
 113
 114                 if (PLDEBUGL(7)) {
 115                         fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
 116                         if (PLDEBUGL(8))
 117                                 board_print(b, stderr);
 118                 }
 119
 120                 if (unlikely(is_pass(coord))) {
 121                         passes++;
 122                 } else {
 123                         /* We don't care about nakade counters, since we want
 124                          * to avoid taking pre-nakade moves into account only
 125                          * if they happenned in the tree before nakade nodes;
 126                          * but this is always out of the tree. */
 127                         if (amafmap) {
 128                                 if (amafmap->map[coord] == S_NONE || amafmap->map[coord] == color)
 129                                         amafmap->map[coord] = color;
 130                                 else if (amafmap->record_nakade)
 131                                         amaf_op(amafmap->map[coord], +);
 132                                 amafmap->game[amafmap->gamelen].coord = coord;
 133                                 amafmap->game[amafmap->gamelen].color = color;
 134                                 amafmap->gamelen++;
 135                                 assert(amafmap->gamelen < sizeof(amafmap->game) / sizeof(amafmap->game[0]));
 136                         }
 137
 138                         passes = 0;
 139                 }
 140
 141                 if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
 142                         break;
 143
 144                 color = stone_other(color);
 145         }
 146
 147         floating_t score = board_fast_score(b);
 148         int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
 149
 150         if (DEBUGL(6)) {
 151                 fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
 152                 if (DEBUGL(7))
 153                         board_print(b, stderr);
 154         }
 155
 156         if (ownermap)
 157                 board_ownermap_fill(ownermap, b);
 158
 159         if (b->ps)
 160                 free(b->ps);
 161
 162 #ifdef DEBUGL_BY_PLAYOUT
 163         debug_level = debug_level_orig;
 164 #endif
 165
 166         return result;
 167 }