1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
29 #include "patternsp.h"
31 #include "playout/elo.h"
34 #include "uct/prior.h"
36 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
39 /* Note that the context can be shared by multiple threads! */
43 struct pattern_config pc
;
44 struct features_gamma
*fg
;
49 struct patternset choose
, assess
;
53 /* This is the core of the policy - initializes and constructs the
54 * probability distribution over the move candidates. */
57 elo_get_probdist(struct playout_policy
*p
, struct patternset
*ps
, struct board
*b
, enum stone to_play
, struct probdist
*pd
)
59 //struct elo_policy *pp = p->data;
62 probdist_init(pd
, board_size2(b
));
64 /* First, assign per-point probabilities. */
66 for (int f
= 0; f
< b
->flen
; f
++) {
67 struct move m
= { .coord
= b
->f
[f
], .color
= to_play
};
69 /* Skip pass (for now)? */
72 //fprintf(stderr, "<%d> %s\n", f, coord2sstr(m.coord, b));
74 /* Skip invalid moves. */
75 if (!board_is_valid_move(b
, &m
))
78 /* We shall never fill our own single-point eyes. */
79 /* XXX: In some rare situations, this prunes the best move:
80 * Bulk-five nakade with eye at 1-1 point. */
81 if (board_is_one_point_eye(b
, &m
.coord
, to_play
)) {
86 /* Each valid move starts with gamma 1. */
87 probdist_add(pd
, m
.coord
, 1.f
);
89 /* Some easy features: */
90 /* XXX: We just disable them for now since we call the
91 * pattern matcher; you need the gammas file. */
93 if (is_bad_selfatari(b
, to_play
, m
.coord
))
94 probdist_mul(pd
, m
.coord
, pp
->selfatari
);
97 /* Match pattern features: */
99 pattern_match(&ps
->pc
, ps
->ps
, &p
, b
, &m
);
100 for (int i
= 0; i
< p
.n
; i
++) {
101 /* Multiply together gammas of all pattern features. */
102 float gamma
= feature_gamma(ps
->fg
, &p
.f
[i
], NULL
);
103 //char buf[256] = ""; feature2str(buf, &p.f[i]);
104 //fprintf(stderr, "<%d> %s feat %s gamma %f\n", f, coord2sstr(m.coord, b), buf, gamma);
105 probdist_mul(pd
, m
.coord
, gamma
);
107 //fprintf(stderr, "<%d> %s %f\n", f, coord2sstr(m.coord, b), pd->moves[m.coord]);
115 playout_elo_choose(struct playout_policy
*p
, struct board
*b
, enum stone to_play
)
117 struct elo_policy
*pp
= p
->data
;
119 elo_get_probdist(p
, &pp
->choose
, b
, to_play
, &pd
);
120 coord_t c
= probdist_pick(&pd
);
126 playout_elo_assess(struct playout_policy
*p
, struct prior_map
*map
, int games
)
128 struct elo_policy
*pp
= p
->data
;
132 moves
= elo_get_probdist(p
, &pp
->assess
, map
->b
, map
->to_play
, &pd
);
134 /* It is a question how to transform the gamma to won games; we use
135 * a naive approach currently, but not sure how well it works. */
136 /* TODO: Try sqrt(p), atan(p)/pi*2. */
138 for (int f
= 0; f
< map
->b
->flen
; f
++) {
139 coord_t c
= map
->b
->f
[f
];
140 if (!map
->consider
[c
])
142 add_prior_value(map
, c
, pd
.moves
[c
] / pd
.total
, games
);
149 struct playout_policy
*
150 playout_elo_init(char *arg
)
152 struct playout_policy
*p
= calloc(1, sizeof(*p
));
153 struct elo_policy
*pp
= calloc(1, sizeof(*pp
));
155 p
->choose
= playout_elo_choose
;
156 p
->assess
= playout_elo_assess
;
158 const char *gammafile
= features_gamma_filename
;
159 /* Some defaults based on the table in Remi Coulom's paper. */
160 pp
->selfatari
= 0.06;
162 struct pattern_config pc
= DEFAULT_PATTERN_CONFIG
;
165 char *optspec
, *next
= arg
;
168 next
+= strcspn(next
, ":");
169 if (*next
) { *next
++ = 0; } else { *next
= 0; }
171 char *optname
= optspec
;
172 char *optval
= strchr(optspec
, '=');
173 if (optval
) *optval
++ = 0;
175 if (!strcasecmp(optname
, "selfatari") && optval
) {
176 pp
->selfatari
= atof(optval
);
177 } else if (!strcasecmp(optname
, "gammafile") && optval
) {
178 /* patterns.gamma by default. We use this,
179 * and need also ${gammafile}f (e.g.
180 * patterns.gammaf) for fast (MC) features. */
181 gammafile
= strdup(optval
);
183 fprintf(stderr
, "playout-elo: Invalid policy argument %s or missing value\n", optname
);
189 pc
.spat_dict
= spatial_dict_init(false);
192 pp
->assess
.fg
= features_gamma_init(&pp
->assess
.pc
, gammafile
);
193 memcpy(pp
->assess
.ps
, PATTERN_SPEC_MATCHALL
, sizeof(pattern_spec
));
195 /* In playouts, we need to operate with much smaller set of features
196 * in order to keep reasonable speed. */
197 /* TODO: Configurable. */ /* TODO: Tune. */
198 pp
->choose
.pc
= FAST_PATTERN_CONFIG
;
199 pp
->choose
.pc
.spat_dict
= pc
.spat_dict
;
200 char cgammafile
[256]; strcpy(stpcpy(cgammafile
, gammafile
), "f");
201 pp
->choose
.fg
= features_gamma_init(&pp
->choose
.pc
, cgammafile
);
202 memcpy(pp
->choose
.ps
, PATTERN_SPEC_MATCHFAST
, sizeof(pattern_spec
));