Merge branch 'master' into libmap
[pachi.git] / playout.c
blob01b17de7e875e20b93b10eaa204e278d1dd7ce33
1 #define DEBUG
2 #include <assert.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #include "board.h"
9 #include "debug.h"
10 #include "engine.h"
11 #include "move.h"
12 #include "ownermap.h"
13 #include "playout.h"
14 #include "tactics/goals.h"
16 /* Whether to set global debug level to the same as the playout
17 * has, in case it is different. This can make sure e.g. tactical
18 * reading produces proper level of debug prints during simulations.
19 * But it is safe to enable this only in single-threaded instances! */
20 //#define DEBUGL_BY_PLAYOUT
22 #define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
25 coord_t
26 play_random_move(struct playout_setup *setup,
27 struct board *b, enum stone color,
28 struct playout_policy *policy)
30 coord_t coord = pass;
32 if (setup->prepolicy_hook) {
33 coord = setup->prepolicy_hook(policy, setup, b, color);
34 // fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
37 if (is_pass(coord)) {
38 coord = policy->choose(policy, setup, b, color);
39 // fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
42 if (is_pass(coord) && setup->postpolicy_hook) {
43 coord = setup->postpolicy_hook(policy, setup, b, color);
44 // fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
47 if (is_pass(coord)) {
48 play_random:
49 /* Defer to uniformly random move choice. */
50 /* This must never happen if the policy is tracking
51 * internal board state, obviously. */
52 assert(!policy->setboard);
53 board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy);
55 } else {
56 struct move m;
57 m.coord = coord; m.color = color;
58 if (board_play(b, &m) < 0) {
59 if (PLDEBUGL(4)) {
60 fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
61 coord_x(coord, b), coord_y(coord, b));
62 board_print(b, stderr);
64 goto play_random;
68 return coord;
71 int
72 play_random_game(struct playout_setup *setup,
73 struct board *b, enum stone starting_color,
74 struct playout_amafmap *amafmap,
75 struct board_ownermap *ownermap,
76 struct playout_policy *policy)
78 assert(setup && policy);
80 int gamelen = setup->gamelen - b->moves;
82 struct libmap_mq lmqueue = {{0}};
83 if (b->libmap) {
84 b->lmqueue = &lmqueue;
85 b->libmap_init_groups = false;
88 if (policy->setboard)
89 policy->setboard(policy, b);
90 #ifdef DEBUGL_BY_PLAYOUT
91 int debug_level_orig = debug_level;
92 debug_level = policy->debug_level;
93 #endif
95 enum stone color = starting_color;
97 int passes = is_pass(b->last_move.coord) && b->moves > 0;
99 while (gamelen-- && passes < 2) {
100 coord_t coord = play_random_move(setup, b, color, policy);
102 #if 0
103 /* For UCT, superko test here is downright harmful since
104 * in superko-likely situation we throw away literally
105 * 95% of our playouts; UCT will deal with this fine by
106 * itself. */
107 if (unlikely(b->superko_violation)) {
108 /* We ignore superko violations that are suicides. These
109 * are common only at the end of the game and are
110 * rather harmless. (They will not go through as a root
111 * move anyway.) */
112 if (group_at(b, coord)) {
113 if (DEBUGL(3)) {
114 fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
115 if (DEBUGL(4))
116 board_print(b, stderr);
118 return 0;
119 } else {
120 if (DEBUGL(6)) {
121 fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
122 board_print(b, stderr);
124 b->superko_violation = false;
127 #endif
129 if (PLDEBUGL(7)) {
130 fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
131 if (PLDEBUGL(8))
132 board_print(b, stderr);
135 if (unlikely(is_pass(coord))) {
136 passes++;
137 } else {
138 passes = 0;
140 if (amafmap) {
141 assert(amafmap->gamelen < MAX_GAMELEN);
142 amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b);
143 amafmap->game[amafmap->gamelen++] = coord;
146 if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
147 break;
149 color = stone_other(color);
152 floating_t score = board_fast_score(b);
153 int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
155 if (DEBUGL(6)) {
156 fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
157 if (DEBUGL(7))
158 board_print(b, stderr);
161 if (ownermap)
162 board_ownermap_fill(ownermap, b);
163 if (b->libmap) {
164 libmap_queue_process(b, score > 0 ? S_WHITE : S_BLACK);
165 b->lmqueue = NULL;
168 if (b->ps)
169 free(b->ps);
171 #ifdef DEBUGL_BY_PLAYOUT
172 debug_level = debug_level_orig;
173 #endif
175 return result;