Elo: Add EAT_ATAN support
[pachi/derm.git] / playout / elo.c
blobe335c18aea0ce77803f5283e82702472f8768659
1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
12 * moves to be played.
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 #include <assert.h>
21 #include <math.h>
22 #include <stdio.h>
23 #include <stdlib.h>
25 //#define DEBUG
26 #include "board.h"
27 #include "debug.h"
28 #include "fixp.h"
29 #include "pattern.h"
30 #include "patternsp.h"
31 #include "playout.h"
32 #include "playout/elo.h"
33 #include "random.h"
34 #include "tactics.h"
35 #include "uct/prior.h"
37 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
40 /* Note that the context can be shared by multiple threads! */
42 struct patternset {
43 pattern_spec ps;
44 struct pattern_config pc;
45 struct features_gamma *fg;
48 struct elo_policy {
49 float selfatari;
50 struct patternset choose, assess;
51 playout_elo_callbackp callback; void *callback_data;
53 enum {
54 EAV_TOTAL,
55 EAV_BEST,
56 } assess_eval;
57 enum {
58 EAT_LINEAR,
59 EAT_ATAN,
60 } assess_transform;
64 /* This is the core of the policy - initializes and constructs the
65 * probability distribution over the move candidates. */
67 int
68 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
70 //struct elo_policy *pp = p->data;
71 int moves = 0;
73 /* First, assign per-point probabilities. */
75 for (int f = 0; f < b->flen; f++) {
76 struct move m = { .coord = b->f[f], .color = to_play };
78 /* Skip pass (for now)? */
79 if (is_pass(m.coord)) {
80 skip_move:
81 probdist_set(pd, m.coord, 0);
82 continue;
84 if (PLDEBUGL(7))
85 fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
87 /* Skip invalid moves. */
88 if (!board_is_valid_move(b, &m))
89 goto skip_move;
91 /* We shall never fill our own single-point eyes. */
92 /* XXX: In some rare situations, this prunes the best move:
93 * Bulk-five nakade with eye at 1-1 point. */
94 if (board_is_one_point_eye(b, m.coord, to_play)) {
95 goto skip_move;
98 moves++;
99 /* Each valid move starts with gamma 1. */
100 double g = 1.f;
102 /* Some easy features: */
103 /* XXX: We just disable them for now since we call the
104 * pattern matcher; you need the gammas file. */
105 #if 0
106 if (is_bad_selfatari(b, to_play, m.coord))
107 g *= pp->selfatari;
108 #endif
110 /* Match pattern features: */
111 struct pattern pat;
112 pattern_match(&ps->pc, ps->ps, &pat, b, &m);
113 for (int i = 0; i < pat.n; i++) {
114 /* Multiply together gammas of all pattern features. */
115 double gamma = feature_gamma(ps->fg, &pat.f[i], NULL);
116 if (PLDEBUGL(7)) {
117 char buf[256] = ""; feature2str(buf, &pat.f[i]);
118 fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
120 g *= gamma;
123 probdist_set(pd, m.coord, double_to_fixp(g));
124 if (PLDEBUGL(7))
125 fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), g);
128 return moves;
132 struct lprobdist {
133 int n;
134 #define LPD_MAX 8
135 coord_t coords[LPD_MAX];
136 fixp_t items[LPD_MAX];
137 fixp_t total;
139 /* Backups of original totals for restoring. */
140 fixp_t btotal;
141 fixp_t browtotals_v[10];
142 int browtotals_i[10];
143 int browtotals_n;
146 #ifdef BOARD_GAMMA
148 static void
149 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
151 #if 0
152 #define PROBDIST_EPSILON double_to_fixp(0.01)
153 struct elo_policy *pp = p->data;
154 if (pd->total == 0)
155 return;
157 /* Compare to the manually created distribution. */
158 /* XXX: This is now broken if callback is used. */
160 probdist_alloca(pdx, b);
161 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
162 for (int i = 0; i < b->flen; i++) {
163 coord_t c = b->f[i];
164 if (is_pass(c)) continue;
165 if (c == b->ko.coord) continue;
166 fixp_t val = pd->items[c];
167 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
168 for (int j = 0; j < lpd->n; j++)
169 if (lpd->coords[j] == c) {
170 val = lpd->items[j];
171 probdist_mute(&pdx, c);
174 if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
175 continue;
176 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
177 board_gamma_update(b, c, to_play);
178 printf("plainboard %f\n", fixp_to_double(pd->items[c]));
179 assert(0);
181 for (int r = 0; r < board_size(b); r++) {
182 if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
183 continue;
184 fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
185 assert(0);
187 assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
188 #undef PROBDIST_EPSILON
189 #endif
192 coord_t
193 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
195 struct elo_policy *pp = p->data;
196 /* The base board probdist. */
197 struct probdist *pd = &b->prob[to_play - 1];
198 /* The list of moves we do not consider in pd. */
199 int ignores[10]; int ignores_n = 0;
200 /* The list of local moves; we consider these separately. */
201 struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
203 /* The engine might want to adjust our probdist. */
204 if (pp->callback)
205 pp->callback(pp->callback_data, b, to_play, pd);
207 if (PLDEBUGL(5)) {
208 fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
211 #define ignore_move(c_) do { \
212 ignores[ignores_n++] = c_; \
213 if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
214 /* Keep ignores[] sorted. We abuse the fact that we know \
215 * only one item can be out-of-order. */ \
216 coord_t cc = ignores[ignores_n - 2]; \
217 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
218 ignores[ignores_n - 1] = cc; \
220 int rowi = coord_y(c_, pd->b); \
221 lpd.browtotals_i[lpd.browtotals_n] = rowi; \
222 lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
223 probdist_mute(pd, c_); \
224 if (PLDEBUGL(6)) \
225 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
226 } while (0)
228 /* Make sure ko-prohibited move does not get picked. */
229 if (!is_pass(b->ko.coord)) {
230 assert(b->ko.color == to_play);
231 ignore_move(b->ko.coord);
234 /* Contiguity detection. */
235 if (!is_pass(b->last_move.coord)) {
236 foreach_8neighbor(b, b->last_move.coord) {
237 if (c == b->ko.coord)
238 continue; // already ignored
239 if (board_at(b, c) != S_NONE) {
240 assert(probdist_one(pd, c) == 0);
241 continue;
243 ignore_move(c);
245 fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
246 lpd.coords[lpd.n] = c;
247 lpd.items[lpd.n++] = val;
248 lpd.total += val;
249 } foreach_8neighbor_end;
252 ignores[ignores_n] = pass;
253 if (PLDEBUGL(5))
254 fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
256 /* Verify sanity, possibly. */
257 elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
259 /* Pick a move. */
260 coord_t c = pass;
261 fixp_t stab = fast_irandom(lpd.total + pd->total);
262 if (PLDEBUGL(5))
263 fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
264 if (stab < lpd.total) {
265 /* Local probdist. */
266 if (PLDEBUGL(6)) {
267 /* Some debug prints. */
268 fixp_t tot = 0;
269 for (int i = 0; i < lpd.n; i++) {
270 tot += lpd.items[i];
271 struct pattern p;
272 struct move m = { .color = to_play, .coord = lpd.coords[i] };
273 if (board_at(b, m.coord) != S_NONE) {
274 assert(lpd.items[i] == 0);
275 continue;
277 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
278 char s[256] = ""; pattern2str(s, &p);
279 fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
280 coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
281 fixp_to_double(tot), s,
282 pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
285 for (int i = 0; i < lpd.n; i++) {
286 if (stab <= lpd.items[i]) {
287 c = lpd.coords[i];
288 break;
290 stab -= lpd.items[i];
292 if (is_pass(c)) {
293 fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
294 abort();
297 } else if (pd->total > 0) {
298 /* Global probdist. */
299 /* XXX: We re-stab inside. */
300 c = probdist_pick(pd, ignores);
302 } else {
303 if (PLDEBUGL(5))
304 fprintf(stderr, "ding!\n");
305 c = pass;
308 /* Repair the damage. */
309 if (pp->callback) {
310 /* XXX: Do something less horribly inefficient
311 * than just recomputing the whole pd. */
312 pd->total = 0;
313 for (int i = 0; i < board_size(pd->b); i++)
314 pd->rowtotals[i] = 0;
315 for (int i = 0; i < b->flen; i++) {
316 pd->items[b->f[i]] = 0;
317 board_gamma_update(b, b->f[i], to_play);
319 assert(pd->total == lpd.btotal);
321 } else {
322 pd->total = lpd.btotal;
323 /* If we touched a row multiple times (and we sure will),
324 * the latter value is obsolete; but since we go through
325 * the backups in reverse order, all is good. */
326 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
327 pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
329 return c;
332 #else
334 coord_t
335 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
337 struct elo_policy *pp = p->data;
338 probdist_alloca(pd, b);
339 elo_get_probdist(p, &pp->choose, b, to_play, &pd);
340 if (pp->callback)
341 pp->callback(pp->callback_data, b, to_play, &pd);
342 if (pd.total == 0)
343 return pass;
344 int ignores[1] = { pass };
345 coord_t c = probdist_pick(&pd, ignores);
346 return c;
349 #endif
351 void
352 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
354 struct elo_policy *pp = p->data;
355 probdist_alloca(pd, map->b);
357 int moves;
358 moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
360 /* It is a question how to transform the gamma to won games; we use
361 * a naive approach currently, but not sure how well it works. */
362 /* TODO: Try sqrt(p), atan(p)/pi*2. */
364 double pd_best = 0;
365 if (pp->assess_eval == EAV_BEST) {
366 for (int f = 0; f < map->b->flen; f++) {
367 double pd_one = fixp_to_double(probdist_one(&pd, map->b->f[f]));
368 if (pd_one > pd_best)
369 pd_best = pd_one;
372 double pd_total = fixp_to_double(probdist_total(&pd));
374 for (int f = 0; f < map->b->flen; f++) {
375 coord_t c = map->b->f[f];
376 if (!map->consider[c])
377 continue;
379 double pd_one = fixp_to_double(probdist_one(&pd, c));
380 double val = 0;
381 switch (pp->assess_eval) {
382 case EAV_TOTAL:
383 val = pd_one / pd_total;
384 break;
385 case EAV_BEST:
386 val = pd_one / pd_best;
387 break;
388 default:
389 assert(0);
392 switch (pp->assess_transform) {
393 case EAT_LINEAR:
394 val = val;
395 break;
396 case EAT_ATAN:
397 val = atan(val)/M_PI;
398 break;
399 default:
400 assert(0);
403 add_prior_value(map, c, val, games);
407 void
408 playout_elo_done(struct playout_policy *p)
410 struct elo_policy *pp = p->data;
411 features_gamma_done(pp->choose.fg);
412 features_gamma_done(pp->assess.fg);
416 void
417 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
419 struct elo_policy *pp = p->data;
420 pp->callback = callback;
421 pp->callback_data = data;
424 struct playout_policy *
425 playout_elo_init(char *arg, struct board *b)
427 struct playout_policy *p = calloc2(1, sizeof(*p));
428 struct elo_policy *pp = calloc2(1, sizeof(*pp));
429 p->data = pp;
430 p->choose = playout_elo_choose;
431 p->assess = playout_elo_assess;
432 p->done = playout_elo_done;
434 const char *gammafile = features_gamma_filename;
435 /* Some defaults based on the table in Remi Coulom's paper. */
436 pp->selfatari = 0.06;
438 struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
439 int xspat = -1;
440 bool precise_selfatari = false;
442 if (arg) {
443 char *optspec, *next = arg;
444 while (*next) {
445 optspec = next;
446 next += strcspn(next, ":");
447 if (*next) { *next++ = 0; } else { *next = 0; }
449 char *optname = optspec;
450 char *optval = strchr(optspec, '=');
451 if (optval) *optval++ = 0;
453 if (!strcasecmp(optname, "selfatari") && optval) {
454 pp->selfatari = atof(optval);
455 } else if (!strcasecmp(optname, "precisesa")) {
456 /* Use precise self-atari detection within
457 * fast patterns. */
458 precise_selfatari = !optval || atoi(optval);
459 } else if (!strcasecmp(optname, "gammafile") && optval) {
460 /* patterns.gamma by default. We use this,
461 * and need also ${gammafile}f (e.g.
462 * patterns.gammaf) for fast (MC) features. */
463 gammafile = strdup(optval);
464 } else if (!strcasecmp(optname, "xspat") && optval) {
465 /* xspat==0: don't match spatial features
466 * xspat==1: match *only* spatial features */
467 xspat = atoi(optval);
468 } else if (!strcasecmp(optname, "assess_eval") && optval) {
469 /* Evaluation method for prior node value
470 * assessment. */
471 if (!strcasecmp(optval, "total")) {
472 /* Proportion prob/totprob. */
473 pp->assess_eval = EAV_TOTAL;
474 } else if (!strcasecmp(optval, "best")) {
475 /* Proportion prob/bestprob. */
476 pp->assess_eval = EAV_BEST;
477 } else {
478 fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
479 exit(1);
481 } else if (!strcasecmp(optname, "assess_transform") && optval) {
482 /* Transformation of evaluation for prior
483 * node value assessment. */
484 if (!strcasecmp(optval, "linear")) {
485 /* No additional transformation. */
486 pp->assess_transform = EAT_LINEAR;
487 } else if (!strcasecmp(optval, "atan")) {
488 /* atan-shape transformation;
489 * pumps up low values. */
490 pp->assess_transform = EAT_ATAN;
491 } else {
492 fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
493 exit(1);
495 } else {
496 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
497 exit(1);
502 pc.spat_dict = spatial_dict_init(false);
504 pp->assess.pc = pc;
505 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
506 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
507 for (int i = 0; i < FEAT_MAX; i++)
508 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
509 pp->assess.ps[i] = 0;
511 /* In playouts, we need to operate with much smaller set of features
512 * in order to keep reasonable speed. */
513 /* TODO: Configurable. */ /* TODO: Tune. */
514 pp->choose.pc = FAST_PATTERN_CONFIG;
515 pp->choose.pc.spat_dict = pc.spat_dict;
516 char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
517 pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
518 memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
519 for (int i = 0; i < FEAT_MAX; i++)
520 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
521 pp->choose.ps[i] = 0;
522 if (precise_selfatari) {
523 pp->choose.ps[FEAT_SELFATARI] &= ~(1<<PF_SELFATARI_STUPID);
524 pp->choose.ps[FEAT_SELFATARI] |= (1<<PF_SELFATARI_SMART);
526 board_gamma_set(b, pp->choose.fg, precise_selfatari);
528 return p;