Elo, probdist: Do not needlessly consider board edges
[pachi/peepo.git] / playout / elo.c
blob6bdd37cf8eed20e59cb1a9878267c6e82a2ec41d
1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
12 * moves to be played.
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 #include <assert.h>
21 #include <math.h>
22 #include <stdio.h>
23 #include <stdlib.h>
25 //#define DEBUG
26 #include "board.h"
27 #include "debug.h"
28 #include "fixp.h"
29 #include "pattern.h"
30 #include "patternsp.h"
31 #include "playout.h"
32 #include "playout/elo.h"
33 #include "random.h"
34 #include "tactics.h"
35 #include "uct/prior.h"
37 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
40 /* Note that the context can be shared by multiple threads! */
42 struct patternset {
43 pattern_spec ps;
44 struct pattern_config pc;
45 struct features_gamma *fg;
48 struct elo_policy {
49 float selfatari;
50 struct patternset choose, assess;
51 playout_elo_callbackp callback; void *callback_data;
55 /* This is the core of the policy - initializes and constructs the
56 * probability distribution over the move candidates. */
58 int
59 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
61 //struct elo_policy *pp = p->data;
62 int moves = 0;
64 /* First, assign per-point probabilities. */
66 for (int f = 0; f < b->flen; f++) {
67 struct move m = { .coord = b->f[f], .color = to_play };
69 /* Skip pass (for now)? */
70 if (is_pass(m.coord)) {
71 skip_move:
72 probdist_set(pd, m.coord, 0);
73 continue;
75 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
77 /* Skip invalid moves. */
78 if (!board_is_valid_move(b, &m))
79 goto skip_move;
81 /* We shall never fill our own single-point eyes. */
82 /* XXX: In some rare situations, this prunes the best move:
83 * Bulk-five nakade with eye at 1-1 point. */
84 if (board_is_one_point_eye(b, m.coord, to_play)) {
85 goto skip_move;
88 moves++;
89 /* Each valid move starts with gamma 1. */
90 double g = 1.f;
92 /* Some easy features: */
93 /* XXX: We just disable them for now since we call the
94 * pattern matcher; you need the gammas file. */
95 #if 0
96 if (is_bad_selfatari(b, to_play, m.coord))
97 g *= pp->selfatari;
98 #endif
100 /* Match pattern features: */
101 struct pattern p;
102 pattern_match(&ps->pc, ps->ps, &p, b, &m);
103 for (int i = 0; i < p.n; i++) {
104 /* Multiply together gammas of all pattern features. */
105 double gamma = feature_gamma(ps->fg, &p.f[i], NULL);
106 //char buf[256] = ""; feature2str(buf, &p.f[i]);
107 //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
108 g *= gamma;
111 probdist_set(pd, m.coord, double_to_fixp(g));
112 //fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), pd->items[f]);
115 return moves;
119 struct lprobdist {
120 int n;
121 #define LPD_MAX 8
122 coord_t coords[LPD_MAX];
123 fixp_t items[LPD_MAX];
124 fixp_t total;
126 /* Backups of original totals for restoring. */
127 fixp_t btotal;
128 fixp_t browtotals_v[10];
129 int browtotals_i[10];
130 int browtotals_n;
133 #ifdef BOARD_GAMMA
135 static void
136 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
138 #if 0
139 #define PROBDIST_EPSILON double_to_fixp(0.01)
140 struct elo_policy *pp = p->data;
141 if (pd->total == 0)
142 return;
144 /* Compare to the manually created distribution. */
145 /* XXX: This is now broken if callback is used. */
147 probdist_alloca(pdx, b);
148 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
149 for (int i = 0; i < b->flen; i++) {
150 coord_t c = b->f[i];
151 if (is_pass(c)) continue;
152 if (c == b->ko.coord) continue;
153 fixp_t val = pd->items[c];
154 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
155 for (int j = 0; j < lpd->n; j++)
156 if (lpd->coords[j] == c) {
157 val = lpd->items[j];
158 probdist_mute(&pdx, c);
161 if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
162 continue;
163 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
164 board_gamma_update(b, c, to_play);
165 printf("plainboard %f\n", fixp_to_double(pd->items[c]));
166 assert(0);
168 for (int r = 0; r < board_size(b); r++) {
169 if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
170 continue;
171 fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
172 assert(0);
174 assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
175 #undef PROBDIST_EPSILON
176 #endif
179 coord_t
180 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
182 struct elo_policy *pp = p->data;
183 /* The base board probdist. */
184 struct probdist *pd = &b->prob[to_play - 1];
185 /* The list of moves we do not consider in pd. */
186 int ignores[10]; int ignores_n = 0;
187 /* The list of local moves; we consider these separately. */
188 struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
190 /* The engine might want to adjust our probdist. */
191 if (pp->callback)
192 pp->callback(pp->callback_data, b, to_play, pd);
194 if (PLDEBUGL(5)) {
195 fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
198 #define ignore_move(c_) do { \
199 ignores[ignores_n++] = c_; \
200 if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
201 /* Keep ignores[] sorted. We abuse the fact that we know \
202 * only one item can be out-of-order. */ \
203 coord_t cc = ignores[ignores_n - 2]; \
204 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
205 ignores[ignores_n - 1] = cc; \
207 int rowi = coord_y(c_, pd->b); \
208 lpd.browtotals_i[lpd.browtotals_n] = rowi; \
209 lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
210 probdist_mute(pd, c_); \
211 if (PLDEBUGL(6)) \
212 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
213 } while (0)
215 /* Make sure ko-prohibited move does not get picked. */
216 if (!is_pass(b->ko.coord)) {
217 assert(b->ko.color == to_play);
218 ignore_move(b->ko.coord);
221 /* Contiguity detection. */
222 if (!is_pass(b->last_move.coord)) {
223 foreach_8neighbor(b, b->last_move.coord) {
224 if (c == b->ko.coord)
225 continue; // already ignored
226 if (board_at(b, c) != S_NONE) {
227 assert(probdist_one(pd, c) == 0);
228 continue;
230 ignore_move(c);
232 fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
233 lpd.coords[lpd.n] = c;
234 lpd.items[lpd.n++] = val;
235 lpd.total += val;
236 } foreach_8neighbor_end;
239 ignores[ignores_n] = pass;
240 if (PLDEBUGL(5))
241 fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
243 /* Verify sanity, possibly. */
244 elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
246 /* Pick a move. */
247 coord_t c = pass;
248 fixp_t stab = fast_irandom(lpd.total + pd->total);
249 if (PLDEBUGL(5))
250 fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
251 if (stab < lpd.total) {
252 /* Local probdist. */
253 if (PLDEBUGL(6)) {
254 /* Some debug prints. */
255 fixp_t tot = 0;
256 for (int i = 0; i < lpd.n; i++) {
257 tot += lpd.items[i];
258 struct pattern p;
259 struct move m = { .color = to_play, .coord = lpd.coords[i] };
260 if (board_at(b, m.coord) != S_NONE) {
261 assert(lpd.items[i] == 0);
262 continue;
264 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
265 char s[256] = ""; pattern2str(s, &p);
266 fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
267 coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
268 fixp_to_double(tot), s,
269 pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
272 for (int i = 0; i < lpd.n; i++) {
273 if (stab <= lpd.items[i]) {
274 c = lpd.coords[i];
275 break;
277 stab -= lpd.items[i];
279 if (is_pass(c)) {
280 fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
281 abort();
284 } else if (pd->total > 0) {
285 /* Global probdist. */
286 /* XXX: We re-stab inside. */
287 c = probdist_pick(pd, ignores);
289 } else {
290 if (PLDEBUGL(5))
291 fprintf(stderr, "ding!\n");
292 c = pass;
295 /* Repair the damage. */
296 if (pp->callback) {
297 /* XXX: Do something less horribly inefficient
298 * than just recomputing the whole pd. */
299 pd->total = 0;
300 for (int i = 0; i < board_size(pd->b); i++)
301 pd->rowtotals[i] = 0;
302 for (int i = 0; i < b->flen; i++) {
303 pd->items[b->f[i]] = 0;
304 board_gamma_update(b, b->f[i], to_play);
306 assert(pd->total == lpd.btotal);
308 } else {
309 pd->total = lpd.btotal;
310 /* If we touched a row multiple times (and we sure will),
311 * the latter value is obsolete; but since we go through
312 * the backups in reverse order, all is good. */
313 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
314 pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
316 return c;
319 #else
321 coord_t
322 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
324 struct elo_policy *pp = p->data;
325 probdist_alloca(pd, b);
326 elo_get_probdist(p, &pp->choose, b, to_play, &pd);
327 if (pp->callback)
328 pp->callback(pp->callback_data, b, to_play, &pd);
329 if (pd.total == 0)
330 return pass;
331 int ignores[1] = { pass };
332 coord_t c = probdist_pick(&pd, ignores);
333 return c;
336 #endif
338 void
339 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
341 struct elo_policy *pp = p->data;
342 probdist_alloca(pd, map->b);
344 int moves;
345 moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
347 /* It is a question how to transform the gamma to won games; we use
348 * a naive approach currently, but not sure how well it works. */
349 /* TODO: Try sqrt(p), atan(p)/pi*2. */
351 for (int f = 0; f < map->b->flen; f++) {
352 coord_t c = map->b->f[f];
353 if (!map->consider[c])
354 continue;
355 add_prior_value(map, c, fixp_to_double(probdist_one(&pd, c)) / fixp_to_double(probdist_total(&pd)), games);
359 void
360 playout_elo_done(struct playout_policy *p)
362 struct elo_policy *pp = p->data;
363 features_gamma_done(pp->choose.fg);
364 features_gamma_done(pp->assess.fg);
368 void
369 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
371 struct elo_policy *pp = p->data;
372 pp->callback = callback;
373 pp->callback_data = data;
376 struct playout_policy *
377 playout_elo_init(char *arg, struct board *b)
379 struct playout_policy *p = calloc2(1, sizeof(*p));
380 struct elo_policy *pp = calloc2(1, sizeof(*pp));
381 p->data = pp;
382 p->choose = playout_elo_choose;
383 p->assess = playout_elo_assess;
384 p->done = playout_elo_done;
386 const char *gammafile = features_gamma_filename;
387 /* Some defaults based on the table in Remi Coulom's paper. */
388 pp->selfatari = 0.06;
390 struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
391 int xspat = -1;
392 bool precise_selfatari = false;
394 if (arg) {
395 char *optspec, *next = arg;
396 while (*next) {
397 optspec = next;
398 next += strcspn(next, ":");
399 if (*next) { *next++ = 0; } else { *next = 0; }
401 char *optname = optspec;
402 char *optval = strchr(optspec, '=');
403 if (optval) *optval++ = 0;
405 if (!strcasecmp(optname, "selfatari") && optval) {
406 pp->selfatari = atof(optval);
407 } else if (!strcasecmp(optname, "precisesa")) {
408 /* Use precise self-atari detection within
409 * fast patterns. */
410 precise_selfatari = !optval || atoi(optval);
411 } else if (!strcasecmp(optname, "gammafile") && optval) {
412 /* patterns.gamma by default. We use this,
413 * and need also ${gammafile}f (e.g.
414 * patterns.gammaf) for fast (MC) features. */
415 gammafile = strdup(optval);
416 } else if (!strcasecmp(optname, "xspat") && optval) {
417 /* xspat==0: don't match spatial features
418 * xspat==1: match *only* spatial features */
419 xspat = atoi(optval);
420 } else {
421 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
422 exit(1);
427 pc.spat_dict = spatial_dict_init(false);
429 pp->assess.pc = pc;
430 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
431 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
432 for (int i = 0; i < FEAT_MAX; i++)
433 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
434 pp->assess.ps[i] = 0;
436 /* In playouts, we need to operate with much smaller set of features
437 * in order to keep reasonable speed. */
438 /* TODO: Configurable. */ /* TODO: Tune. */
439 pp->choose.pc = FAST_PATTERN_CONFIG;
440 pp->choose.pc.spat_dict = pc.spat_dict;
441 char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
442 pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
443 memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
444 for (int i = 0; i < FEAT_MAX; i++)
445 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
446 pp->choose.ps[i] = 0;
447 if (precise_selfatari)
448 pp->choose.ps[FEAT_SELFATARI] = ~(1<<PF_SELFATARI_STUPID);
449 board_gamma_set(b, pp->choose.fg, precise_selfatari);
451 return p;