1 /* Playout player based on probability distribution generated over
2 * the available moves. */
4 /* We use the ELO-based (Coulom, 2007) approach, where each board
5 * feature (matched pattern, self-atari, capture, MC owner?, ...)
6 * is pre-assigned "playing strength" (gamma).
8 * Then, the problem of choosing a move is basically a team
9 * competition in ELO terms - each spot is represented by a team
10 * of features appearing there; the team gamma is product of feature
11 * gammas. The team gammas make for a probability distribution of
14 * We use the general pattern classifier that will find the features
15 * for us, and external datasets that can be harvested from a set
16 * of game records (see the HACKING file for details): patterns.spat
17 * as a dictionary of spatial stone configurations, and patterns.gamma
18 * with strengths of particular features. */
20 /* TODO: Counter-captures are not appreciated properly. This seems to be
21 * a major strength problem. */
33 #include "patternsp.h"
35 #include "playout/elo.h"
37 #include "uct/prior.h"
39 #define PLDEBUGL(n) DEBUGL_(p->debug_level, n)
42 /* Note that the context can be shared by multiple threads! */
46 struct pattern_config pc
;
47 struct features_gamma
*fg
;
53 struct patternset choose
, assess
;
54 playout_elo_callbackp callback
; void *callback_data
;
69 /* This is the core of the policy - initializes and constructs the
70 * probability distribution over the move candidates. */
73 elo_get_probdist(struct playout_policy
*p
, struct patternset
*ps
, struct board
*b
, enum stone to_play
, struct probdist
*pd
)
75 //struct elo_policy *pp = p->data;
78 /* First, assign per-point probabilities. */
80 for (int f
= 0; f
< b
->flen
; f
++) {
81 struct move m
= { .coord
= b
->f
[f
], .color
= to_play
};
83 /* Skip pass (for now)? */
84 if (is_pass(m
.coord
)) {
86 probdist_set(pd
, m
.coord
, 0);
90 fprintf(stderr
, "<%d> %s\n", f
, coord2sstr(m
.coord
, b
));
92 /* Skip invalid moves. */
93 if (!board_is_valid_move(b
, &m
))
96 /* We shall never fill our own single-point eyes. */
97 /* XXX: In some rare situations, this prunes the best move:
98 * Bulk-five nakade with eye at 1-1 point. */
99 if (board_is_one_point_eye(b
, m
.coord
, to_play
)) {
104 /* Each valid move starts with gamma 1. */
107 /* Some easy features: */
108 /* XXX: We just disable them for now since we call the
109 * pattern matcher; you need the gammas file. */
111 if (is_bad_selfatari(b
, to_play
, m
.coord
))
115 /* Match pattern features: */
117 pattern_match(&ps
->pc
, ps
->ps
, &pat
, b
, &m
);
118 for (int i
= 0; i
< pat
.n
; i
++) {
119 /* Multiply together gammas of all pattern features. */
120 double gamma
= feature_gamma(ps
->fg
, &pat
.f
[i
], NULL
);
122 char buf
[256] = ""; feature2str(buf
, &pat
.f
[i
]);
123 fprintf(stderr
, "<%d> %s feat %s gamma %f\n", f
, coord2sstr(m
.coord
, b
), buf
, gamma
);
128 probdist_set(pd
, m
.coord
, double_to_fixp(g
));
130 fprintf(stderr
, "<%d> %s %f (E %f)\n", f
, coord2sstr(m
.coord
, b
), fixp_to_double(probdist_one(pd
, m
.coord
)), g
);
140 coord_t coords
[LPD_MAX
];
141 fixp_t items
[LPD_MAX
];
144 /* Backups of original totals for restoring. */
146 fixp_t browtotals_v
[10];
147 int browtotals_i
[10];
151 #if defined(BOARD_GAMMA) && defined(BOARD_TRAITS)
154 elo_check_probdist(struct playout_policy
*p
, struct board
*b
, enum stone to_play
, struct probdist
*pd
, int *ignores
, struct lprobdist
*lpd
, coord_t lc
)
157 #define PROBDIST_EPSILON double_to_fixp(0.01)
158 struct elo_policy
*pp
= p
->data
;
162 /* Compare to the manually created distribution. */
163 /* XXX: This is now broken if callback is used. */
165 probdist_alloca(pdx
, b
);
166 elo_get_probdist(p
, &pp
->choose
, b
, to_play
, &pdx
);
167 for (int i
= 0; i
< b
->flen
; i
++) {
169 if (is_pass(c
)) continue;
170 if (c
== b
->ko
.coord
) continue;
171 fixp_t val
= pd
->items
[c
];
172 if (!is_pass(lc
) && coord_is_8adjecent(lc
, c
, b
))
173 for (int j
= 0; j
< lpd
->n
; j
++)
174 if (lpd
->coords
[j
] == c
) {
176 probdist_mute(&pdx
, c
);
179 if (abs(pdx
.items
[c
] - val
) < PROBDIST_EPSILON
)
181 printf("[%s %d] manual %f board %f (base %f) ", coord2sstr(c
, b
), b
->pat3
[c
], fixp_to_double(pdx
.items
[c
]), fixp_to_double(val
), fixp_to_double(pd
->items
[c
]));
182 board_gamma_update(b
, c
, to_play
);
183 printf("plainboard %f\n", fixp_to_double(pd
->items
[c
]));
186 for (int r
= 0; r
< board_size(b
); r
++) {
187 if (abs(pdx
.rowtotals
[r
] - pd
->rowtotals
[r
]) < PROBDIST_EPSILON
)
189 fprintf(stderr
, "row %d: manual %f board %f\n", r
, fixp_to_double(pdx
.rowtotals
[r
]), fixp_to_double(pd
->rowtotals
[r
]));
192 assert(abs(pdx
.total
- pd
->total
) < PROBDIST_EPSILON
);
193 #undef PROBDIST_EPSILON
198 playout_elo_choose(struct playout_policy
*p
, struct board
*b
, enum stone to_play
)
200 struct elo_policy
*pp
= p
->data
;
201 /* The base board probdist. */
202 struct probdist
*pd
= &b
->prob
[to_play
- 1];
203 /* The list of moves we do not consider in pd. */
204 int ignores
[10]; int ignores_n
= 0;
205 /* The list of local moves; we consider these separately. */
206 struct lprobdist lpd
= { .n
= 0, .total
= 0, .btotal
= pd
->total
, .browtotals_n
= 0 };
208 /* The engine might want to adjust our probdist. */
210 pp
->callback(pp
->callback_data
, b
, to_play
, pd
);
213 fprintf(stderr
, "pd total pre %f lpd %f\n", fixp_to_double(pd
->total
), fixp_to_double(lpd
.total
));
216 #define ignore_move(c_) do { \
217 ignores[ignores_n++] = c_; \
218 if (ignores_n > 1 && ignores[ignores_n - 1] < ignores[ignores_n - 2]) { \
219 /* Keep ignores[] sorted. We abuse the fact that we know \
220 * only one item can be out-of-order. */ \
221 coord_t cc = ignores[ignores_n - 2]; \
222 ignores[ignores_n - 2] = ignores[ignores_n - 1]; \
223 ignores[ignores_n - 1] = cc; \
225 int rowi = coord_y(c_, pd->b); \
226 lpd.browtotals_i[lpd.browtotals_n] = rowi; \
227 lpd.browtotals_v[lpd.browtotals_n++] = pd->rowtotals[rowi]; \
228 probdist_mute(pd, c_); \
230 fprintf(stderr, "ignored move %s(%f) => tot pd %f lpd %f\n", coord2sstr(c_, pd->b), fixp_to_double(pd->items[c_]), fixp_to_double(pd->total), fixp_to_double(lpd.total)); \
233 /* Make sure ko-prohibited move does not get picked. */
234 if (!is_pass(b
->ko
.coord
)) {
235 assert(b
->ko
.color
== to_play
);
236 ignore_move(b
->ko
.coord
);
239 /* Contiguity detection. */
240 if (!is_pass(b
->last_move
.coord
)) {
241 foreach_8neighbor(b
, b
->last_move
.coord
) {
242 if (c
== b
->ko
.coord
)
243 continue; // already ignored
244 if (board_at(b
, c
) != S_NONE
) {
245 assert(probdist_one(pd
, c
) == 0);
250 fixp_t val
= double_to_fixp(fixp_to_double(probdist_one(pd
, c
)) * b
->gamma
->gamma
[FEAT_CONTIGUITY
][1]);
251 lpd
.coords
[lpd
.n
] = c
;
252 lpd
.items
[lpd
.n
++] = val
;
254 } foreach_8neighbor_end
;
257 ignores
[ignores_n
] = pass
;
259 fprintf(stderr
, "pd total post %f lpd %f\n", fixp_to_double(pd
->total
), fixp_to_double(lpd
.total
));
261 /* Verify sanity, possibly. */
262 elo_check_probdist(p
, b
, to_play
, pd
, ignores
, &lpd
, b
->last_move
.coord
);
266 fixp_t stab
= fast_irandom(lpd
.total
+ pd
->total
);
268 fprintf(stderr
, "stab %f / (%f + %f)\n", fixp_to_double(stab
), fixp_to_double(lpd
.total
), fixp_to_double(pd
->total
));
269 if (stab
< lpd
.total
) {
270 /* Local probdist. */
272 /* Some debug prints. */
274 for (int i
= 0; i
< lpd
.n
; i
++) {
277 struct move m
= { .color
= to_play
, .coord
= lpd
.coords
[i
] };
278 if (board_at(b
, m
.coord
) != S_NONE
) {
279 assert(lpd
.items
[i
] == 0);
282 pattern_match(&pp
->choose
.pc
, pp
->choose
.ps
, &p
, b
, &m
);
283 char s
[256] = ""; pattern2str(s
, &p
);
284 fprintf(stderr
, "coord %s <%f> [tot %f] %s (p3:%d)\n",
285 coord2sstr(lpd
.coords
[i
], b
), fixp_to_double(lpd
.items
[i
]),
286 fixp_to_double(tot
), s
,
287 pattern3_by_spatial(pp
->choose
.pc
.spat_dict
, b
->pat3
[lpd
.coords
[i
]]));
290 for (int i
= 0; i
< lpd
.n
; i
++) {
291 if (stab
<= lpd
.items
[i
]) {
295 stab
-= lpd
.items
[i
];
298 fprintf(stderr
, "elo: local overstab [%f]\n", fixp_to_double(stab
));
302 } else if (pd
->total
> 0) {
303 /* Global probdist. */
304 /* XXX: We re-stab inside. */
305 c
= probdist_pick(pd
, ignores
);
309 fprintf(stderr
, "ding!\n");
313 /* Repair the damage. */
315 /* XXX: Do something less horribly inefficient
316 * than just recomputing the whole pd. */
318 for (int i
= 0; i
< board_size(pd
->b
); i
++)
319 pd
->rowtotals
[i
] = 0;
320 for (int i
= 0; i
< b
->flen
; i
++) {
321 pd
->items
[b
->f
[i
]] = 0;
322 board_gamma_update(b
, b
->f
[i
], to_play
);
324 assert(pd
->total
== lpd
.btotal
);
327 pd
->total
= lpd
.btotal
;
328 /* If we touched a row multiple times (and we sure will),
329 * the latter value is obsolete; but since we go through
330 * the backups in reverse order, all is good. */
331 for (int j
= lpd
.browtotals_n
- 1; j
>= 0; j
--)
332 pd
->rowtotals
[lpd
.browtotals_i
[j
]] = lpd
.browtotals_v
[j
];
340 playout_elo_choose(struct playout_policy
*p
, struct board
*b
, enum stone to_play
)
342 struct elo_policy
*pp
= p
->data
;
343 probdist_alloca(pd
, b
);
344 elo_get_probdist(p
, &pp
->choose
, b
, to_play
, &pd
);
346 pp
->callback(pp
->callback_data
, b
, to_play
, &pd
);
349 int ignores
[1] = { pass
};
350 coord_t c
= probdist_pick(&pd
, ignores
);
357 playout_elo_assess(struct playout_policy
*p
, struct prior_map
*map
, int games
)
359 struct elo_policy
*pp
= p
->data
;
360 probdist_alloca(pd
, map
->b
);
363 moves
= elo_get_probdist(p
, &pp
->assess
, map
->b
, map
->to_play
, &pd
);
365 /* It is a question how to transform the gamma to won games; we use
366 * a naive approach currently, but not sure how well it works. */
367 /* TODO: Try sqrt(p), atan(p)/pi*2. */
370 if (pp
->assess_eval
== EAV_BEST
) {
371 for (int f
= 0; f
< map
->b
->flen
; f
++) {
372 double pd_one
= fixp_to_double(probdist_one(&pd
, map
->b
->f
[f
]));
373 if (pd_one
> pd_best
)
377 double pd_total
= fixp_to_double(probdist_total(&pd
));
379 for (int f
= 0; f
< map
->b
->flen
; f
++) {
380 coord_t c
= map
->b
->f
[f
];
381 if (!map
->consider
[c
])
384 double pd_one
= fixp_to_double(probdist_one(&pd
, c
));
386 switch (pp
->assess_eval
) {
388 val
= pd_one
/ pd_total
;
391 val
= pd_one
/ pd_best
;
397 switch (pp
->assess_transform
) {
402 val
= atan(val
)/M_PI
;
405 val
= 1.0 / (1.0 + exp(-pp
->assess_sigmb
* (val
- 0.5)));
411 add_prior_value(map
, c
, val
, games
);
416 playout_elo_done(struct playout_policy
*p
)
418 struct elo_policy
*pp
= p
->data
;
419 features_gamma_done(pp
->choose
.fg
);
420 features_gamma_done(pp
->assess
.fg
);
425 playout_elo_callback(struct playout_policy
*p
, playout_elo_callbackp callback
, void *data
)
427 struct elo_policy
*pp
= p
->data
;
428 pp
->callback
= callback
;
429 pp
->callback_data
= data
;
432 struct playout_policy
*
433 playout_elo_init(char *arg
, struct board
*b
)
435 struct playout_policy
*p
= calloc2(1, sizeof(*p
));
436 struct elo_policy
*pp
= calloc2(1, sizeof(*pp
));
438 p
->choose
= playout_elo_choose
;
439 p
->assess
= playout_elo_assess
;
440 p
->done
= playout_elo_done
;
442 const char *gammafile
= features_gamma_filename
;
443 pp
->assess_sigmb
= 10.0;
444 /* Some defaults based on the table in Remi Coulom's paper. */
445 pp
->selfatari
= 0.06;
447 struct pattern_config pc
= DEFAULT_PATTERN_CONFIG
;
449 bool precise_selfatari
= false;
452 char *optspec
, *next
= arg
;
455 next
+= strcspn(next
, ":");
456 if (*next
) { *next
++ = 0; } else { *next
= 0; }
458 char *optname
= optspec
;
459 char *optval
= strchr(optspec
, '=');
460 if (optval
) *optval
++ = 0;
462 if (!strcasecmp(optname
, "selfatari") && optval
) {
463 pp
->selfatari
= atof(optval
);
464 } else if (!strcasecmp(optname
, "precisesa")) {
465 /* Use precise self-atari detection within
467 precise_selfatari
= !optval
|| atoi(optval
);
468 } else if (!strcasecmp(optname
, "gammafile") && optval
) {
469 /* patterns.gamma by default. We use this,
470 * and need also ${gammafile}f (e.g.
471 * patterns.gammaf) for fast (MC) features. */
472 gammafile
= strdup(optval
);
473 } else if (!strcasecmp(optname
, "xspat") && optval
) {
474 /* xspat==0: don't match spatial features
475 * xspat==1: match *only* spatial features */
476 xspat
= atoi(optval
);
477 } else if (!strcasecmp(optname
, "assess_fastpat")) {
478 /* Use just fast pattern set even for the
479 * node prior value assessment. */
480 pp
->assess_fastpat
= !optval
|| atoi(optval
);
481 } else if (!strcasecmp(optname
, "assess_sigmb") && optval
) {
482 pp
->assess_sigmb
= atof(optval
);
483 } else if (!strcasecmp(optname
, "assess_eval") && optval
) {
484 /* Evaluation method for prior node value
486 if (!strcasecmp(optval
, "total")) {
487 /* Proportion prob/totprob. */
488 pp
->assess_eval
= EAV_TOTAL
;
489 } else if (!strcasecmp(optval
, "best")) {
490 /* Proportion prob/bestprob. */
491 pp
->assess_eval
= EAV_BEST
;
493 fprintf(stderr
, "playout-elo: Invalid eval mode %s\n", optval
);
496 } else if (!strcasecmp(optname
, "assess_transform") && optval
) {
497 /* Transformation of evaluation for prior
498 * node value assessment. */
499 if (!strcasecmp(optval
, "linear")) {
500 /* No additional transformation. */
501 pp
->assess_transform
= EAT_LINEAR
;
502 } else if (!strcasecmp(optval
, "atan")) {
503 /* atan-shape transformation;
504 * pumps up low values. */
505 pp
->assess_transform
= EAT_ATAN
;
506 } else if (!strcasecmp(optval
, "sigmoid")) {
507 /* Sigmoid transformation
508 * according to assess_sigmb. */
509 pp
->assess_transform
= EAT_SIGMOID
;
511 fprintf(stderr
, "playout-elo: Invalid eval mode %s\n", optval
);
515 fprintf(stderr
, "playout-elo: Invalid policy argument %s or missing value\n", optname
);
521 pc
.spat_dict
= spatial_dict_init(false);
523 /* In playouts, we need to operate with much smaller set of features
524 * in order to keep reasonable speed. */
525 /* TODO: Configurable. */ /* TODO: Tune. */
526 pp
->choose
.pc
= FAST_PATTERN_CONFIG
;
527 pp
->choose
.pc
.spat_dict
= pc
.spat_dict
;
528 char cgammafile
[256]; strcpy(stpcpy(cgammafile
, gammafile
), "f");
529 pp
->choose
.fg
= features_gamma_init(&pp
->choose
.pc
, cgammafile
);
530 memcpy(pp
->choose
.ps
, PATTERN_SPEC_MATCHFAST
, sizeof(pattern_spec
));
531 for (int i
= 0; i
< FEAT_MAX
; i
++)
532 if ((xspat
== 0 && i
== FEAT_SPATIAL
) || (xspat
== 1 && i
!= FEAT_SPATIAL
))
533 pp
->choose
.ps
[i
] = 0;
534 if (precise_selfatari
) {
535 pp
->choose
.ps
[FEAT_SELFATARI
] &= ~(1<<PF_SELFATARI_STUPID
);
536 pp
->choose
.ps
[FEAT_SELFATARI
] |= (1<<PF_SELFATARI_SMART
);
538 board_gamma_set(b
, pp
->choose
.fg
, precise_selfatari
);
540 if (pp
->assess_fastpat
) {
541 pp
->assess
= pp
->choose
;
542 pp
->assess
.fg
= features_gamma_init(&pp
->assess
.pc
, cgammafile
);
544 /* More detailed set of features. */
546 pp
->assess
.fg
= features_gamma_init(&pp
->assess
.pc
, gammafile
);
547 memcpy(pp
->assess
.ps
, PATTERN_SPEC_MATCHALL
, sizeof(pattern_spec
));
548 for (int i
= 0; i
< FEAT_MAX
; i
++)
549 if ((xspat
== 0 && i
== FEAT_SPATIAL
) || (xspat
== 1 && i
!= FEAT_SPATIAL
))
550 pp
->assess
.ps
[i
] = 0;