Montecasino: Slightly simplify choose_best_move()
[pachi.git] / montecasino / montecasino.c
blob7a90bdf24f60e21c6558539f792879f0a3b26704
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include "board.h"
7 #include "engine.h"
8 #include "move.h"
9 #include "montecarlo/internal.h"
10 #include "montecarlo/hint.h"
11 #include "montecasino/montecasino.h"
12 #include "random.h"
15 /* This is a monte-carlo-based engine with additional per-move heuristics and
16 * some feedback mechanisms. It is based on montecarlo/, with some enhancements
17 * that would make it too convoluted already. It plays MC_GAMES "random" games
18 * from the current board and records win/loss ratio for each first move.
19 * The move with the biggest number of winning games gets played. */
20 /* Note that while the library is based on New Zealand rules, this engine
21 * returns moves according to Chinese rules. Thus, it does not return suicide
22 * moves. It of course respects positional superko too. */
24 /* The arguments accepted are same as montecarlo's. Please see
25 * montecarlo/montecarlo.c for the documentation. */
26 /* Note that YOU MUST PLAY MANY SIMULATIONS for montecasino to work well!
27 * 100000 is about the low sensible bound. */
29 /* How many games must be played for a move in order to trust it. */
30 #define TRUST_THRESHOLD 10
33 /* We reuse large part of the code from the montecarlo/ engine. The
34 * struct montecarlo internal state is part of our internal state; actually,
35 * for now we just use the montecarlo state. */
38 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
39 * simply does not matter; even 100000 simulations are fast enough to
40 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
41 * already. */
42 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
43 * to consider 'pass' among the moves, but this seems tricky. */
46 /* 1: m->color wins, 0: m->color loses; -1: no moves left
47 * -2 superko inside the game tree (NOT at root, that's simply invalid move)
48 * -3 first move is multi-stone suicide */
49 static int
50 play_random_game(struct montecarlo *mc, struct board *b, struct move_stat *moves,
51 struct move *m, int i)
53 struct board b2;
54 board_copy(&b2, b);
56 board_play_random(&b2, m->color, &m->coord);
57 if (is_pass(m->coord) || b->superko_violation) {
58 if (mc->debug_level > 3)
59 fprintf(stderr, "\tno moves left\n");
60 board_done_noalloc(&b2);
61 return -1;
63 if (!group_at(&b2, m->coord)) {
64 if (mc->debug_level > 4) {
65 fprintf(stderr, "SUICIDE DETECTED at %d,%d:\n", coord_x(m->coord), coord_y(m->coord));
66 board_print(&b2, stderr);
68 return -3;
71 if (mc->debug_level > 3)
72 fprintf(stderr, "[%d,%d] playing random game\n", coord_x(m->coord), coord_y(m->coord));
74 int gamelen = mc->gamelen - b2.moves;
75 if (gamelen < 10)
76 gamelen = 10;
78 enum stone color = stone_other(m->color);
79 coord_t next_move = pass;
80 coord_t urgent;
82 int passes = 0;
84 /* Special check: We probably tenukied the last opponent's move. But
85 * check if the opponent has lucrative local continuation for her last
86 * move! */
87 /* This check is ultra-important BTW. Without it domain checking does
88 * not bring that much of an advantage. It might even warrant it to by
89 * default do only this domain check. */
90 urgent = pass;
91 domain_hint(mc, b, &urgent, m->color);
92 if (!is_pass(urgent))
93 goto play_urgent;
95 while (gamelen-- && passes < 2) {
96 urgent = pass;
97 domain_hint(mc, &b2, &urgent, m->color);
99 coord_t coord;
101 if (!is_pass(urgent)) {
102 struct move m;
103 play_urgent:
104 m.coord = urgent; m.color = color;
105 if (board_play(&b2, &m) < 0) {
106 if (unlikely(mc->debug_level > 7)) {
107 fprintf(stderr, "Urgent move %d,%d is ILLEGAL:\n", coord_x(urgent), coord_y(urgent));
108 board_print(&b2, stderr);
110 goto play_random;
112 coord = urgent;
113 } else {
114 play_random:
115 board_play_random(&b2, color, &coord);
118 if (is_pass(next_move))
119 next_move = coord;
121 if (unlikely(b2.superko_violation)) {
122 /* We ignore superko violations that are suicides. These
123 * are common only at the end of the game and are
124 * rather harmless. (They will not go through as a root
125 * move anyway.) */
126 if (group_at(&b2, coord)) {
127 if (unlikely(mc->debug_level > 3)) {
128 fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord), coord_y(coord));
129 if (mc->debug_level > 4)
130 board_print(&b2, stderr);
132 board_done_noalloc(&b2);
133 return -2;
134 } else {
135 if (unlikely(mc->debug_level > 6)) {
136 fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord), coord_y(coord));
137 board_print(&b2, stderr);
139 b2.superko_violation = false;
143 if (unlikely(mc->debug_level > 7)) {
144 char *cs = coord2str(coord);
145 fprintf(stderr, "%s %s\n", stone2str(color), cs);
146 free(cs);
149 if (unlikely(is_pass(coord))) {
150 passes++;
151 } else {
152 passes = 0;
155 color = stone_other(color);
158 if (mc->debug_level > 6 - !(i % (mc->games/2)))
159 board_print(&b2, stderr);
161 float score = board_fast_score(&b2);
162 bool result = (m->color == S_WHITE ? (score > 0 ? 1 : 0) : (score < 0 ? 1 : 0));
164 if (mc->debug_level > 3) {
165 fprintf(stderr, "\tresult %d (score %f)\n", result, score);
168 int j = m->coord.pos * b->size2 + next_move.pos;
169 moves[j].games++;
170 if (!result)
171 moves[j].wins++;
173 board_done_noalloc(&b2);
174 return result;
177 /* 1: Games played. 0: No games can be played from this position anymore. */
178 static int
179 play_many_random_games(struct montecarlo *mc, struct board *b, int games, enum stone color,
180 struct move_stat *moves, struct move_stat *second_moves)
182 struct move m;
183 m.color = color;
184 int losses = 0;
185 int i, superko = 0, good_games = 0;
186 for (i = 0; i < mc->games; i++) {
187 int result = play_random_game(mc, b, second_moves, &m, i);
189 if (result == -1)
190 return 0;
191 if (result == -2) {
192 /* Superko. We just ignore this playout.
193 * And play again. */
194 if (unlikely(superko > 2 * mc->games)) {
195 /* Uhh. Triple ko, or something? */
196 if (mc->debug_level > 0)
197 fprintf(stderr, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
198 return 0;
200 /* This playout didn't count; we should not
201 * disadvantage moves that lead to a superko.
202 * And it is supposed to be rare. */
203 i--, superko++;
204 continue;
206 if (result == -3) {
207 /* Multi-stone suicide. We play chinese rules,
208 * so we can't consider this. (Note that we
209 * unfortunately still consider this in playouts.) */
210 continue;
213 good_games++;
214 moves[m.coord.pos].games++;
216 if (b->moves < 3) {
217 /* Simple heuristic: avoid opening too low. Do not
218 * play on second or first line as first white or
219 * first two black moves.*/
220 if (coord_x(m.coord) < 3 || coord_x(m.coord) > b->size - 4
221 || coord_y(m.coord) < 3 || coord_y(m.coord) > b->size - 4)
222 continue;
225 losses += 1 - result;
226 moves[m.coord.pos].wins += result;
228 if (unlikely(!losses && i == mc->loss_threshold)) {
229 /* We played out many games and didn't lose once yet.
230 * This game is over. */
231 break;
235 if (!good_games) {
236 /* No more valid moves. */
237 return 0;
240 return 1;
244 struct move_info {
245 coord_t coord;
246 float ratio;
249 static void
250 create_move_queue(struct montecarlo *mc, struct board *b,
251 struct move_stat *moves, struct move_info *q)
253 int qlen = 0;
254 foreach_point(b) {
255 float ratio = (float) moves[c.pos].wins / moves[c.pos].games;
256 if (!isfinite(ratio))
257 continue;
258 struct move_info mi = { c, ratio };
259 if (qlen == 0 || q[qlen - 1].ratio > ratio) {
260 q[qlen++] = mi;
261 } else {
262 int i;
263 for (i = 0; i < qlen; i++) {
264 if (q[i].ratio >= ratio)
265 continue;
266 memmove(&q[i + 1], &q[i], sizeof(*q) * (qlen++ - i));
267 q[i] = mi;
268 break;
271 } foreach_point_end;
274 static float
275 best_move_at_board(struct montecarlo *mc, struct board *b, struct move_stat *moves)
277 float top_ratio = 0;
278 foreach_point(b) {
279 if (moves[c.pos].games < TRUST_THRESHOLD)
280 continue;
281 float ratio = (float) moves[c.pos].wins / moves[c.pos].games;
282 if (ratio > top_ratio)
283 top_ratio = ratio;
284 } foreach_point_end;
285 return top_ratio;
288 static void
289 choose_best_move(struct montecarlo *mc, struct board *b,
290 struct move_stat *moves, struct move_stat *second_moves, struct move_stat *first_moves,
291 float *top_ratio, coord_t *top_coord)
293 struct move_info sorted_moves[b->size2];
294 memset(sorted_moves, 0, b->size2 * sizeof(*sorted_moves));
295 create_move_queue(mc, b, moves, sorted_moves);
296 /* Now, moves sorted descending by ratio are in sorted_moves. */
298 /* We take the moves with ratio better than 0.55 (arbitrary value),
299 * but at least best ten (arbitrary value). From those, we choose
300 * the one where opponent's best counterattack has worst chance
301 * of working. */
303 /* Before, we just tried to take _any_ move with the opponent's worst
304 * counterattack, but that didn't work very well in the practice;
305 * there have to be way too many game playouts to have reliable
306 * second_moves[], apparently. */
308 int move = 0;
309 while (move < 10 || (move < b->size2 && sorted_moves[move].ratio > 0.55)) {
310 coord_t c = sorted_moves[move].coord;
311 move++;
312 if (!moves[c.pos].wins) { /* whatever */
313 continue;
316 float ratio = 1 - best_move_at_board(mc, b, &second_moves[c.pos * b->size2]);
317 if (ratio > *top_ratio) {
318 *top_ratio = ratio;
319 *top_coord = c;
321 /* Evil cheat. */
322 first_moves[c.pos].games = 100; first_moves[c.pos].wins = ratio * 100;
323 if (mc->debug_level > 2) {
324 fprintf(stderr, "Winner candidate [%d,%d] has counter ratio %f\n", coord_x(c), coord_y(c), ratio);
325 if (mc->debug_level > 3)
326 board_stats_print(b, &second_moves[c.pos * b->size2], stderr);
332 static coord_t *
333 montecasino_genmove(struct engine *e, struct board *b, enum stone color)
335 struct montecarlo *mc = e->data;
337 /* resign when the hope for win vanishes */
338 coord_t top_coord = resign;
339 float top_ratio = mc->resign_ratio;
341 struct move_stat moves[b->size2];
342 struct move_stat second_moves[b->size2][b->size2];
343 /* Then first moves again, final decision; only for debugging */
344 struct move_stat first_moves[b->size2];
345 memset(moves, 0, sizeof(moves));
346 memset(second_moves, 0, sizeof(second_moves));
347 memset(first_moves, 0, sizeof(first_moves));
349 if (!play_many_random_games(mc, b, mc->games, color, moves, (struct move_stat *) second_moves)) {
350 /* No more moves. */
351 top_coord = pass; top_ratio = 0.5;
352 goto move_found;
355 /* We take the best moves and choose the one with least lucrative
356 * opponent's counterattack. */
357 choose_best_move(mc, b, moves, (struct move_stat *) second_moves, first_moves, &top_ratio, &top_coord);
359 if (mc->debug_level > 2) {
360 fprintf(stderr, "Our board stats:\n");
361 board_stats_print(b, moves, stderr);
362 fprintf(stderr, "Opponents' counters stats:\n");
363 board_stats_print(b, first_moves, stderr);
364 if (!is_resign(top_coord)) {
365 fprintf(stderr, "Opponent's reaction stats:\n");
366 board_stats_print(b, second_moves[top_coord.pos], stderr);
370 move_found:
371 if (mc->debug_level > 1)
372 fprintf(stderr, "*** WINNER is %d,%d with score %1.4f\n", coord_x(top_coord), coord_y(top_coord), top_ratio);
374 return coord_copy(top_coord);
377 struct engine *
378 engine_montecasino_init(char *arg)
380 struct montecarlo *mc = montecarlo_state_init(arg);
381 struct engine *e = calloc(1, sizeof(struct engine));
382 e->name = "MonteCasino Engine";
383 e->comment = "I'm playing in Monte Casino now! When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
384 e->genmove = montecasino_genmove;
385 e->data = mc;
387 return e;