9 #include "montecarlo/internal.h"
10 #include "montecarlo/hint.h"
11 #include "montecasino/montecasino.h"
15 /* This is a monte-carlo-based engine with additional per-move heuristics and
16 * some feedback mechanisms. It is based on montecarlo/, with some enhancements
17 * that would make it too convoluted already. It plays MC_GAMES "random" games
18 * from the current board and records win/loss ratio for each first move.
19 * The move with the biggest number of winning games gets played. */
20 /* Note that while the library is based on New Zealand rules, this engine
21 * returns moves according to Chinese rules. Thus, it does not return suicide
22 * moves. It of course respects positional superko too. */
24 /* The arguments accepted are same as montecarlo's. Please see
25 * montecarlo/montecarlo.c for the documentation. */
26 /* Note that YOU MUST PLAY MANY SIMULATIONS for montecasino to work well!
27 * 100000 is about the low sensible bound. */
29 /* How many games must be played for a move in order to trust it. */
30 #define TRUST_THRESHOLD 10
33 /* We reuse large part of the code from the montecarlo/ engine. The
34 * struct montecarlo internal state is part of our internal state; actually,
35 * for now we just use the montecarlo state. */
38 /* FIXME: Cutoff rule for simulations. Currently we are so fast that this
39 * simply does not matter; even 100000 simulations are fast enough to
40 * play 5 minutes S.D. on 19x19 and anything more sounds too ridiculous
42 /* FIXME: We cannot handle seki. Any good ideas are welcome. A possibility is
43 * to consider 'pass' among the moves, but this seems tricky. */
46 /* 1: m->color wins, 0: m->color loses; -1: no moves left
47 * -2 superko inside the game tree (NOT at root, that's simply invalid move)
48 * -3 first move is multi-stone suicide */
50 play_random_game(struct montecarlo
*mc
, struct board
*b
, struct move_stat
*moves
,
51 struct move
*m
, int i
)
56 board_play_random(&b2
, m
->color
, &m
->coord
);
57 if (is_pass(m
->coord
) || b
->superko_violation
) {
58 if (mc
->debug_level
> 3)
59 fprintf(stderr
, "\tno moves left\n");
60 board_done_noalloc(&b2
);
63 if (!group_at(&b2
, m
->coord
)) {
64 if (mc
->debug_level
> 4) {
65 fprintf(stderr
, "SUICIDE DETECTED at %d,%d:\n", coord_x(m
->coord
), coord_y(m
->coord
));
66 board_print(&b2
, stderr
);
71 if (mc
->debug_level
> 3)
72 fprintf(stderr
, "[%d,%d] playing random game\n", coord_x(m
->coord
), coord_y(m
->coord
));
74 int gamelen
= mc
->gamelen
- b2
.moves
;
78 enum stone color
= stone_other(m
->color
);
79 coord_t next_move
= pass
;
84 /* Special check: We probably tenukied the last opponent's move. But
85 * check if the opponent has lucrative local continuation for her last
87 /* This check is ultra-important BTW. Without it domain checking does
88 * not bring that much of an advantage. It might even warrant it to by
89 * default do only this domain check. */
91 domain_hint(mc
, b
, &urgent
, m
->color
);
95 while (gamelen
-- && passes
< 2) {
97 domain_hint(mc
, &b2
, &urgent
, m
->color
);
101 if (!is_pass(urgent
)) {
104 m
.coord
= urgent
; m
.color
= color
;
105 if (board_play(&b2
, &m
) < 0) {
106 if (unlikely(mc
->debug_level
> 7)) {
107 fprintf(stderr
, "Urgent move %d,%d is ILLEGAL:\n", coord_x(urgent
), coord_y(urgent
));
108 board_print(&b2
, stderr
);
115 board_play_random(&b2
, color
, &coord
);
118 if (is_pass(next_move
))
121 if (unlikely(b2
.superko_violation
)) {
122 /* We ignore superko violations that are suicides. These
123 * are common only at the end of the game and are
124 * rather harmless. (They will not go through as a root
126 if (group_at(&b2
, coord
)) {
127 if (unlikely(mc
->debug_level
> 3)) {
128 fprintf(stderr
, "Superko fun at %d,%d in\n", coord_x(coord
), coord_y(coord
));
129 if (mc
->debug_level
> 4)
130 board_print(&b2
, stderr
);
132 board_done_noalloc(&b2
);
135 if (unlikely(mc
->debug_level
> 6)) {
136 fprintf(stderr
, "Ignoring superko at %d,%d in\n", coord_x(coord
), coord_y(coord
));
137 board_print(&b2
, stderr
);
139 b2
.superko_violation
= false;
143 if (unlikely(mc
->debug_level
> 7)) {
144 char *cs
= coord2str(coord
);
145 fprintf(stderr
, "%s %s\n", stone2str(color
), cs
);
149 if (unlikely(is_pass(coord
))) {
155 color
= stone_other(color
);
158 if (mc
->debug_level
> 6 - !(i
% (mc
->games
/2)))
159 board_print(&b2
, stderr
);
161 float score
= board_fast_score(&b2
);
162 bool result
= (m
->color
== S_WHITE
? (score
> 0 ? 1 : 0) : (score
< 0 ? 1 : 0));
164 if (mc
->debug_level
> 3) {
165 fprintf(stderr
, "\tresult %d (score %f)\n", result
, score
);
168 int j
= m
->coord
.pos
* b
->size2
+ next_move
.pos
;
173 board_done_noalloc(&b2
);
177 /* 1: Games played. 0: No games can be played from this position anymore. */
179 play_many_random_games(struct montecarlo
*mc
, struct board
*b
, int games
, enum stone color
,
180 struct move_stat
*moves
, struct move_stat
*second_moves
)
185 int i
, superko
= 0, good_games
= 0;
186 for (i
= 0; i
< mc
->games
; i
++) {
187 int result
= play_random_game(mc
, b
, second_moves
, &m
, i
);
192 /* Superko. We just ignore this playout.
194 if (unlikely(superko
> 2 * mc
->games
)) {
195 /* Uhh. Triple ko, or something? */
196 if (mc
->debug_level
> 0)
197 fprintf(stderr
, "SUPERKO LOOP. I will pass. Did we hit triple ko?\n");
200 /* This playout didn't count; we should not
201 * disadvantage moves that lead to a superko.
202 * And it is supposed to be rare. */
207 /* Multi-stone suicide. We play chinese rules,
208 * so we can't consider this. (Note that we
209 * unfortunately still consider this in playouts.) */
214 moves
[m
.coord
.pos
].games
++;
217 /* Simple heuristic: avoid opening too low. Do not
218 * play on second or first line as first white or
219 * first two black moves.*/
220 if (coord_x(m
.coord
) < 3 || coord_x(m
.coord
) > b
->size
- 4
221 || coord_y(m
.coord
) < 3 || coord_y(m
.coord
) > b
->size
- 4)
225 losses
+= 1 - result
;
226 moves
[m
.coord
.pos
].wins
+= result
;
228 if (unlikely(!losses
&& i
== mc
->loss_threshold
)) {
229 /* We played out many games and didn't lose once yet.
230 * This game is over. */
236 /* No more valid moves. */
250 create_move_queue(struct montecarlo
*mc
, struct board
*b
,
251 struct move_stat
*moves
, struct move_info
*q
)
255 float ratio
= (float) moves
[c
.pos
].wins
/ moves
[c
.pos
].games
;
256 if (!isfinite(ratio
))
258 struct move_info mi
= { c
, ratio
};
259 if (qlen
== 0 || q
[qlen
- 1].ratio
> ratio
) {
263 for (i
= 0; i
< qlen
; i
++) {
264 if (q
[i
].ratio
>= ratio
)
266 memmove(&q
[i
+ 1], &q
[i
], sizeof(*q
) * (qlen
++ - i
));
275 best_move_at_board(struct montecarlo
*mc
, struct board
*b
, struct move_stat
*moves
)
279 if (moves
[c
.pos
].games
< TRUST_THRESHOLD
)
281 float ratio
= (float) moves
[c
.pos
].wins
/ moves
[c
.pos
].games
;
282 if (ratio
> top_ratio
)
289 choose_best_move(struct montecarlo
*mc
, struct board
*b
,
290 struct move_stat
*moves
, struct move_stat
*second_moves
, struct move_stat
*first_moves
,
291 float *top_ratio
, coord_t
*top_coord
)
293 struct move_info sorted_moves
[b
->size2
];
294 memset(sorted_moves
, 0, b
->size2
* sizeof(*sorted_moves
));
295 create_move_queue(mc
, b
, moves
, sorted_moves
);
296 /* Now, moves sorted descending by ratio are in sorted_moves. */
298 /* We take the moves with ratio better than 0.55 (arbitrary value),
299 * but at least best ten (arbitrary value). From those, we choose
300 * the one where opponent's best counterattack has worst chance
303 /* Before, we just tried to take _any_ move with the opponent's worst
304 * counterattack, but that didn't work very well in the practice;
305 * there have to be way too many game playouts to have reliable
306 * second_moves[], apparently. */
309 while (move
< 10 || (move
< b
->size2
&& sorted_moves
[move
].ratio
> 0.55)) {
310 coord_t c
= sorted_moves
[move
].coord
;
312 if (!moves
[c
.pos
].wins
) { /* whatever */
316 float ratio
= 1 - best_move_at_board(mc
, b
, &second_moves
[c
.pos
* b
->size2
]);
317 if (ratio
> *top_ratio
) {
322 first_moves
[c
.pos
].games
= 100; first_moves
[c
.pos
].wins
= ratio
* 100;
323 if (mc
->debug_level
> 2) {
324 fprintf(stderr
, "Winner candidate [%d,%d] has counter ratio %f\n", coord_x(c
), coord_y(c
), ratio
);
325 if (mc
->debug_level
> 3)
326 board_stats_print(b
, &second_moves
[c
.pos
* b
->size2
], stderr
);
333 montecasino_genmove(struct engine
*e
, struct board
*b
, enum stone color
)
335 struct montecarlo
*mc
= e
->data
;
337 /* resign when the hope for win vanishes */
338 coord_t top_coord
= resign
;
339 float top_ratio
= mc
->resign_ratio
;
341 struct move_stat moves
[b
->size2
];
342 struct move_stat second_moves
[b
->size2
][b
->size2
];
343 /* Then first moves again, final decision; only for debugging */
344 struct move_stat first_moves
[b
->size2
];
345 memset(moves
, 0, sizeof(moves
));
346 memset(second_moves
, 0, sizeof(second_moves
));
347 memset(first_moves
, 0, sizeof(first_moves
));
349 if (!play_many_random_games(mc
, b
, mc
->games
, color
, moves
, (struct move_stat
*) second_moves
)) {
351 top_coord
= pass
; top_ratio
= 0.5;
355 /* We take the best moves and choose the one with least lucrative
356 * opponent's counterattack. */
357 choose_best_move(mc
, b
, moves
, (struct move_stat
*) second_moves
, first_moves
, &top_ratio
, &top_coord
);
359 if (mc
->debug_level
> 2) {
360 fprintf(stderr
, "Our board stats:\n");
361 board_stats_print(b
, moves
, stderr
);
362 fprintf(stderr
, "Opponents' counters stats:\n");
363 board_stats_print(b
, first_moves
, stderr
);
364 if (!is_resign(top_coord
)) {
365 fprintf(stderr
, "Opponent's reaction stats:\n");
366 board_stats_print(b
, second_moves
[top_coord
.pos
], stderr
);
371 if (mc
->debug_level
> 1)
372 fprintf(stderr
, "*** WINNER is %d,%d with score %1.4f\n", coord_x(top_coord
), coord_y(top_coord
), top_ratio
);
374 return coord_copy(top_coord
);
378 engine_montecasino_init(char *arg
)
380 struct montecarlo
*mc
= montecarlo_state_init(arg
);
381 struct engine
*e
= calloc(1, sizeof(struct engine
));
382 e
->name
= "MonteCasino Engine";
383 e
->comment
= "I'm playing in Monte Casino now! When we both pass, I will consider all the stones on the board alive. If you are reading this, write 'yes'. Please bear with me at the game end, I need to fill the whole board; if you help me, we will both be happier. Filling the board will not lose points (NZ rules).";
384 e
->genmove
= montecasino_genmove
;