Support BOARD_GAMMA without BOARD_TRAITS
[pachi/ann.git] / playout / elo.c
blob5f736882f8535c6089cb3f7933e07ac13c84c0c7
1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
12 * moves to be played.
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 /* TODO: Counter-captures are not appreciated properly. This seems to be
21 * a major strength problem. */
23 #include <assert.h>
24 #include <math.h>
25 #include <stdio.h>
26 #include <stdlib.h>
28 //#define DEBUG
29 #include "board.h"
30 #include "debug.h"
31 #include "fixp.h"
32 #include "pattern.h"
33 #include "patternsp.h"
34 #include "playout.h"
35 #include "playout/elo.h"
36 #include "random.h"
37 #include "tactics.h"
38 #include "uct/prior.h"
40 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
43 /* Note that the context can be shared by multiple threads! */
45 struct patternset {
46 pattern_spec ps;
47 struct pattern_config pc;
48 struct features_gamma *fg;
51 struct elo_policy {
52 bool assess_fastpat;
53 float selfatari;
54 struct patternset choose, assess;
55 playout_elo_callbackp callback; void *callback_data;
57 enum {
58 EAV_TOTAL,
59 EAV_BEST,
60 } assess_eval;
61 enum {
62 EAT_LINEAR,
63 EAT_ATAN,
64 EAT_SIGMOID,
65 } assess_transform;
66 double assess_sigmb;
70 /* This is the core of the policy - initializes and constructs the
71 * probability distribution over the move candidates. */
73 int
74 elo_get_probdist(struct playout_policy *p, struct patternset *ps, struct board *b, enum stone to_play, struct probdist *pd)
76 //struct elo_policy *pp = p->data;
77 int moves = 0;
79 /* First, assign per-point probabilities. */
81 for (int f = 0; f < b->flen; f++) {
82 struct move m = { .coord = b->f[f], .color = to_play };
84 /* Skip pass (for now)? */
85 if (is_pass(m.coord)) {
86 skip_move:
87 probdist_set(pd, m.coord, 0);
88 continue;
90 if (PLDEBUGL(7))
91 fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
93 /* Skip invalid moves. */
94 if (!board_is_valid_move(b, &m))
95 goto skip_move;
97 /* We shall never fill our own single-point eyes. */
98 /* XXX: In some rare situations, this prunes the best move:
99 * Bulk-five nakade with eye at 1-1 point. */
100 if (board_is_one_point_eye(b, m.coord, to_play)) {
101 goto skip_move;
104 moves++;
105 /* Each valid move starts with gamma 1. */
106 double g = 1.f;
108 /* Some easy features: */
109 /* XXX: We just disable them for now since we call the
110 * pattern matcher; you need the gammas file. */
111 #if 0
112 if (is_bad_selfatari(b, to_play, m.coord))
113 g *= pp->selfatari;
114 #endif
116 /* Match pattern features: */
117 struct pattern pat;
118 pattern_match(&ps->pc, ps->ps, &pat, b, &m);
119 for (int i = 0; i < pat.n; i++) {
120 /* Multiply together gammas of all pattern features. */
121 double gamma = feature_gamma(ps->fg, &pat.f[i], NULL);
122 if (PLDEBUGL(7)) {
123 char buf[256] = ""; feature2str(buf, &pat.f[i]);
124 fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
126 g *= gamma;
129 probdist_set(pd, m.coord, double_to_fixp(g));
130 if (PLDEBUGL(7))
131 fprintf(stderr, "<%d> %s %f (E %f)\n", f, coord2sstr(m.coord, b), fixp_to_double(probdist_one(pd, m.coord)), g);
134 return moves;
138 struct lprobdist {
139 int n;
140 #define LPD_MAX 8
141 coord_t coords[LPD_MAX];
142 fixp_t items[LPD_MAX];
143 fixp_t total;
145 /* Backups of original totals for restoring. */
146 fixp_t btotal;
147 fixp_t browtotals_v[10];
148 int browtotals_i[10];
149 int browtotals_n;
152 #if defined(BOARD_GAMMA) && defined(BOARD_TRAITS)
154 static void
155 elo_check_probdist(struct playout_policy *p, struct board *b, enum stone to_play, struct probdist *pd, int *ignores, struct lprobdist *lpd, coord_t lc)
157 #if 0
158 #define PROBDIST_EPSILON double_to_fixp(0.01)
159 struct elo_policy *pp = p->data;
160 if (pd->total == 0)
161 return;
163 /* Compare to the manually created distribution. */
164 /* XXX: This is now broken if callback is used. */
166 probdist_alloca(pdx, b);
167 elo_get_probdist(p, &pp->choose, b, to_play, &pdx);
168 for (int i = 0; i < b->flen; i++) {
169 coord_t c = b->f[i];
170 if (is_pass(c)) continue;
171 if (c == b->ko.coord) continue;
172 fixp_t val = pd->items[c];
173 if (!is_pass(lc) && coord_is_8adjecent(lc, c, b))
174 for (int j = 0; j < lpd->n; j++)
175 if (lpd->coords[j] == c) {
176 val = lpd->items[j];
177 probdist_mute(&pdx, c);
180 if (abs(pdx.items[c] - val) < PROBDIST_EPSILON)
181 continue;
182 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c, b), b->pat3[c], fixp_to_double(pdx.items[c]), fixp_to_double(val), fixp_to_double(pd->items[c]));
183 board_gamma_update(b, c, to_play);
184 printf("plainboard %f\n", fixp_to_double(pd->items[c]));
185 assert(0);
187 for (int r = 0; r < board_size(b); r++) {
188 if (abs(pdx.rowtotals[r] - pd->rowtotals[r]) < PROBDIST_EPSILON)
189 continue;
190 fprintf(stderr, "row %d: manual %f board %f\n", r, fixp_to_double(pdx.rowtotals[r]), fixp_to_double(pd->rowtotals[r]));
191 assert(0);
193 assert(abs(pdx.total - pd->total) < PROBDIST_EPSILON);
194 #undef PROBDIST_EPSILON
195 #endif
198 coord_t
199 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
201 struct elo_policy *pp = p->data;
202 /* The base board probdist. */
203 struct probdist *pd = &b->prob[to_play - 1];
204 /* The list of moves we do not consider in pd. */
205 int ignores[10]; int ignores_n = 0;
206 /* The list of local moves; we consider these separately. */
207 struct lprobdist lpd = { .n = 0, .total = 0, .btotal = pd->total, .browtotals_n = 0 };
209 /* The engine might want to adjust our probdist. */
210 if (pp->callback)
211 pp->callback(pp->callback_data, b, to_play, pd);
213 if (PLDEBUGL(5)) {
214 fprintf(stderr, "pd total pre %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
217 #define ignore_move(c_) do { \
218 ignores[ignores_n++] = c_; \
219 if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
220 /* Keep ignores[] sorted. We abuse the fact that we know \
221 * only one item can be out-of-order. */ \
222 coord_t cc = ignores[ignores_n - 2]; \
223 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
224 ignores[ignores_n - 1] = cc; \
226 int rowi = coord_y(c_, pd->b); \
227 lpd.browtotals_i[lpd.browtotals_n] = rowi; \
228 lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
229 probdist_mute(pd, c_); \
230 if (PLDEBUGL(6)) \
231 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
232 } while (0)
234 /* Make sure ko-prohibited move does not get picked. */
235 if (!is_pass(b->ko.coord)) {
236 assert(b->ko.color == to_play);
237 ignore_move(b->ko.coord);
240 /* Contiguity detection. */
241 if (!is_pass(b->last_move.coord)) {
242 foreach_8neighbor(b, b->last_move.coord) {
243 if (c == b->ko.coord)
244 continue; // already ignored
245 if (board_at(b, c) != S_NONE) {
246 assert(probdist_one(pd, c) == 0);
247 continue;
249 ignore_move(c);
251 fixp_t val = double_to_fixp(fixp_to_double(probdist_one(pd, c)) * b->gamma->gamma[FEAT_CONTIGUITY][1]);
252 lpd.coords[lpd.n] = c;
253 lpd.items[lpd.n++] = val;
254 lpd.total += val;
255 } foreach_8neighbor_end;
258 ignores[ignores_n] = pass;
259 if (PLDEBUGL(5))
260 fprintf(stderr, "pd total post %f lpd %f\n", fixp_to_double(pd->total), fixp_to_double(lpd.total));
262 /* Verify sanity, possibly. */
263 elo_check_probdist(p, b, to_play, pd, ignores, &lpd, b->last_move.coord);
265 /* Pick a move. */
266 coord_t c = pass;
267 fixp_t stab = fast_irandom(lpd.total + pd->total);
268 if (PLDEBUGL(5))
269 fprintf(stderr, "stab %f / (%f + %f)\n", fixp_to_double(stab), fixp_to_double(lpd.total), fixp_to_double(pd->total));
270 if (stab < lpd.total) {
271 /* Local probdist. */
272 if (PLDEBUGL(6)) {
273 /* Some debug prints. */
274 fixp_t tot = 0;
275 for (int i = 0; i < lpd.n; i++) {
276 tot += lpd.items[i];
277 struct pattern p;
278 struct move m = { .color = to_play, .coord = lpd.coords[i] };
279 if (board_at(b, m.coord) != S_NONE) {
280 assert(lpd.items[i] == 0);
281 continue;
283 pattern_match(&pp->choose.pc, pp->choose.ps, &p, b, &m);
284 char s[256] = ""; pattern2str(s, &p);
285 fprintf(stderr, "coord %s <%f> [tot %f] %s (p3:%d)\n",
286 coord2sstr(lpd.coords[i], b), fixp_to_double(lpd.items[i]),
287 fixp_to_double(tot), s,
288 pattern3_by_spatial(pp->choose.pc.spat_dict, b->pat3[lpd.coords[i]]));
291 for (int i = 0; i < lpd.n; i++) {
292 if (stab <= lpd.items[i]) {
293 c = lpd.coords[i];
294 break;
296 stab -= lpd.items[i];
298 if (is_pass(c)) {
299 fprintf(stderr, "elo: local overstab [%f]\n", fixp_to_double(stab));
300 abort();
303 } else if (pd->total > 0) {
304 /* Global probdist. */
305 /* XXX: We re-stab inside. */
306 c = probdist_pick(pd, ignores);
308 } else {
309 if (PLDEBUGL(5))
310 fprintf(stderr, "ding!\n");
311 c = pass;
314 /* Repair the damage. */
315 if (pp->callback) {
316 /* XXX: Do something less horribly inefficient
317 * than just recomputing the whole pd. */
318 pd->total = 0;
319 for (int i = 0; i < board_size(pd->b); i++)
320 pd->rowtotals[i] = 0;
321 for (int i = 0; i < b->flen; i++) {
322 pd->items[b->f[i]] = 0;
323 board_gamma_update(b, b->f[i], to_play);
325 assert(pd->total == lpd.btotal);
327 } else {
328 pd->total = lpd.btotal;
329 /* If we touched a row multiple times (and we sure will),
330 * the latter value is obsolete; but since we go through
331 * the backups in reverse order, all is good. */
332 for (int j = lpd.browtotals_n - 1; j >= 0; j--)
333 pd->rowtotals[lpd.browtotals_i[j]] = lpd.browtotals_v[j];
335 return c;
338 #else
340 coord_t
341 playout_elo_choose(struct playout_policy *p, struct board *b, enum stone to_play)
343 struct elo_policy *pp = p->data;
344 probdist_alloca(pd, b);
345 elo_get_probdist(p, &pp->choose, b, to_play, &pd);
346 if (pp->callback)
347 pp->callback(pp->callback_data, b, to_play, &pd);
348 if (pd.total == 0)
349 return pass;
350 int ignores[1] = { pass };
351 coord_t c = probdist_pick(&pd, ignores);
352 return c;
355 #endif
357 void
358 playout_elo_assess(struct playout_policy *p, struct prior_map *map, int games)
360 struct elo_policy *pp = p->data;
361 probdist_alloca(pd, map->b);
363 int moves;
364 moves = elo_get_probdist(p, &pp->assess, map->b, map->to_play, &pd);
366 /* It is a question how to transform the gamma to won games; we use
367 * a naive approach currently, but not sure how well it works. */
368 /* TODO: Try sqrt(p), atan(p)/pi*2. */
370 double pd_best = 0;
371 if (pp->assess_eval == EAV_BEST) {
372 for (int f = 0; f < map->b->flen; f++) {
373 double pd_one = fixp_to_double(probdist_one(&pd, map->b->f[f]));
374 if (pd_one > pd_best)
375 pd_best = pd_one;
378 double pd_total = fixp_to_double(probdist_total(&pd));
380 for (int f = 0; f < map->b->flen; f++) {
381 coord_t c = map->b->f[f];
382 if (!map->consider[c])
383 continue;
385 double pd_one = fixp_to_double(probdist_one(&pd, c));
386 double val = 0;
387 switch (pp->assess_eval) {
388 case EAV_TOTAL:
389 val = pd_one / pd_total;
390 break;
391 case EAV_BEST:
392 val = pd_one / pd_best;
393 break;
394 default:
395 assert(0);
398 switch (pp->assess_transform) {
399 case EAT_LINEAR:
400 val = val;
401 break;
402 case EAT_ATAN:
403 val = atan(val)/M_PI;
404 break;
405 case EAT_SIGMOID:
406 val = 1.0 / (1.0 + exp(-pp->assess_sigmb * (val - 0.5)));
407 break;
408 default:
409 assert(0);
412 add_prior_value(map, c, val, games);
416 void
417 playout_elo_done(struct playout_policy *p)
419 struct elo_policy *pp = p->data;
420 features_gamma_done(pp->choose.fg);
421 features_gamma_done(pp->assess.fg);
425 void
426 playout_elo_callback(struct playout_policy *p, playout_elo_callbackp callback, void *data)
428 struct elo_policy *pp = p->data;
429 pp->callback = callback;
430 pp->callback_data = data;
433 struct playout_policy *
434 playout_elo_init(char *arg, struct board *b)
436 struct playout_policy *p = calloc2(1, sizeof(*p));
437 struct elo_policy *pp = calloc2(1, sizeof(*pp));
438 p->data = pp;
439 p->choose = playout_elo_choose;
440 p->assess = playout_elo_assess;
441 p->done = playout_elo_done;
443 const char *gammafile = features_gamma_filename;
444 pp->assess_sigmb = 10.0;
445 /* Some defaults based on the table in Remi Coulom's paper. */
446 pp->selfatari = 0.06;
448 struct pattern_config pc = DEFAULT_PATTERN_CONFIG;
449 int xspat = -1;
450 bool precise_selfatari = false;
452 if (arg) {
453 char *optspec, *next = arg;
454 while (*next) {
455 optspec = next;
456 next += strcspn(next, ":");
457 if (*next) { *next++ = 0; } else { *next = 0; }
459 char *optname = optspec;
460 char *optval = strchr(optspec, '=');
461 if (optval) *optval++ = 0;
463 if (!strcasecmp(optname, "selfatari") && optval) {
464 pp->selfatari = atof(optval);
465 } else if (!strcasecmp(optname, "precisesa")) {
466 /* Use precise self-atari detection within
467 * fast patterns. */
468 precise_selfatari = !optval || atoi(optval);
469 } else if (!strcasecmp(optname, "gammafile") && optval) {
470 /* patterns.gamma by default. We use this,
471 * and need also ${gammafile}f (e.g.
472 * patterns.gammaf) for fast (MC) features. */
473 gammafile = strdup(optval);
474 } else if (!strcasecmp(optname, "xspat") && optval) {
475 /* xspat==0: don't match spatial features
476 * xspat==1: match *only* spatial features */
477 xspat = atoi(optval);
478 } else if (!strcasecmp(optname, "assess_fastpat")) {
479 /* Use just fast pattern set even for the
480 * node prior value assessment. */
481 pp->assess_fastpat = !optval || atoi(optval);
482 } else if (!strcasecmp(optname, "assess_sigmb") && optval) {
483 pp->assess_sigmb = atof(optval);
484 } else if (!strcasecmp(optname, "assess_eval") && optval) {
485 /* Evaluation method for prior node value
486 * assessment. */
487 if (!strcasecmp(optval, "total")) {
488 /* Proportion prob/totprob. */
489 pp->assess_eval = EAV_TOTAL;
490 } else if (!strcasecmp(optval, "best")) {
491 /* Proportion prob/bestprob. */
492 pp->assess_eval = EAV_BEST;
493 } else {
494 fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
495 exit(1);
497 } else if (!strcasecmp(optname, "assess_transform") && optval) {
498 /* Transformation of evaluation for prior
499 * node value assessment. */
500 if (!strcasecmp(optval, "linear")) {
501 /* No additional transformation. */
502 pp->assess_transform = EAT_LINEAR;
503 } else if (!strcasecmp(optval, "atan")) {
504 /* atan-shape transformation;
505 * pumps up low values. */
506 pp->assess_transform = EAT_ATAN;
507 } else if (!strcasecmp(optval, "sigmoid")) {
508 /* Sigmoid transformation
509 * according to assess_sigmb. */
510 pp->assess_transform = EAT_SIGMOID;
511 } else {
512 fprintf(stderr, "playout-elo: Invalid eval mode %s\n", optval);
513 exit(1);
515 } else {
516 fprintf(stderr, "playout-elo: Invalid policy argument %s or missing value\n", optname);
517 exit(1);
522 pc.spat_dict = spatial_dict_init(false);
524 /* In playouts, we need to operate with much smaller set of features
525 * in order to keep reasonable speed. */
526 /* TODO: Configurable. */ /* TODO: Tune. */
527 pp->choose.pc = FAST_PATTERN_CONFIG;
528 pp->choose.pc.spat_dict = pc.spat_dict;
529 char cgammafile[256]; strcpy(stpcpy(cgammafile, gammafile), "f");
530 pp->choose.fg = features_gamma_init(&pp->choose.pc, cgammafile);
531 memcpy(pp->choose.ps, PATTERN_SPEC_MATCHFAST, sizeof(pattern_spec));
532 for (int i = 0; i < FEAT_MAX; i++)
533 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
534 pp->choose.ps[i] = 0;
535 if (precise_selfatari) {
536 pp->choose.ps[FEAT_SELFATARI] &= ~(1<<PF_SELFATARI_STUPID);
537 pp->choose.ps[FEAT_SELFATARI] |= (1<<PF_SELFATARI_SMART);
539 board_gamma_set(b, pp->choose.fg, precise_selfatari);
541 if (pp->assess_fastpat) {
542 pp->assess = pp->choose;
543 pp->assess.fg = features_gamma_init(&pp->assess.pc, cgammafile);
544 } else {
545 /* More detailed set of features. */
546 pp->assess.pc = pc;
547 pp->assess.fg = features_gamma_init(&pp->assess.pc, gammafile);
548 memcpy(pp->assess.ps, PATTERN_SPEC_MATCHALL, sizeof(pattern_spec));
549 for (int i = 0; i < FEAT_MAX; i++)
550 if ((xspat == 0 && i == FEAT_SPATIAL) || (xspat == 1 && i != FEAT_SPATIAL))
551 pp->assess.ps[i] = 0;
554 return p;