10 #include "uct/dynkomi.h"
11 #include "uct/internal.h"
16 uct_dynkomi_generic_done(struct uct_dynkomi
*d
)
18 if (d
->data
) free(d
->data
);
23 /* NONE dynkomi strategy - never fiddle with komi values. */
26 uct_dynkomi_init_none(struct uct
*u
, char *arg
, struct board
*b
)
28 struct uct_dynkomi
*d
= calloc(1, sizeof(*d
));
32 d
->done
= uct_dynkomi_generic_done
;
36 fprintf(stderr
, "uct: Dynkomi method none accepts no arguments\n");
44 /* LINEAR dynkomi strategy - Linearly Decreasing Handicap Compensation. */
45 /* At move 0, we impose extra komi of handicap_count*handicap_value, then
46 * we linearly decrease this extra komi throughout the game down to 0
49 struct dynkomi_linear
{
56 uct_dynkomi_linear_permove(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
)
58 struct dynkomi_linear
*l
= d
->data
;
59 if (b
->moves
>= l
->moves
)
62 float base_komi
= board_effective_handicap(b
, l
->handicap_value
);
63 float extra_komi
= base_komi
* (l
->moves
- b
->moves
) / l
->moves
;
68 uct_dynkomi_linear_persim(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
, struct tree_node
*node
)
70 struct dynkomi_linear
*l
= d
->data
;
72 return tree
->extra_komi
;
73 /* We don't reuse computed value from tree->extra_komi,
74 * since we want to use value correct for this node depth.
75 * This also means the values will stay correct after
77 return uct_dynkomi_linear_permove(d
, b
, tree
);
81 uct_dynkomi_init_linear(struct uct
*u
, char *arg
, struct board
*b
)
83 struct uct_dynkomi
*d
= calloc(1, sizeof(*d
));
85 d
->permove
= uct_dynkomi_linear_permove
;
86 d
->persim
= uct_dynkomi_linear_persim
;
87 d
->done
= uct_dynkomi_generic_done
;
89 struct dynkomi_linear
*l
= calloc(1, sizeof(*l
));
92 if (board_size(b
) - 2 >= 19)
94 l
->handicap_value
= 7;
97 char *optspec
, *next
= arg
;
100 next
+= strcspn(next
, ":");
101 if (*next
) { *next
++ = 0; } else { *next
= 0; }
103 char *optname
= optspec
;
104 char *optval
= strchr(optspec
, '=');
105 if (optval
) *optval
++ = 0;
107 if (!strcasecmp(optname
, "moves") && optval
) {
108 /* Dynamic komi in handicap game; linearly
109 * decreases to basic settings until move
111 l
->moves
= atoi(optval
);
112 } else if (!strcasecmp(optname
, "handicap_value") && optval
) {
113 /* Point value of single handicap stone,
114 * for dynkomi computation. */
115 l
->handicap_value
= atoi(optval
);
116 } else if (!strcasecmp(optname
, "rootbased")) {
117 /* If set, the extra komi applied will be
118 * the same for all simulations within a move,
119 * instead of being same for all simulations
120 * within the tree node. */
121 l
->rootbased
= !optval
|| atoi(optval
);
123 fprintf(stderr
, "uct: Invalid dynkomi argument %s or missing value\n", optname
);
133 /* ADAPTIVE dynkomi strategy - Adaptive Situational Compensation */
134 /* We adapt the komi based on current situation:
135 * (i) score-based: We maintain the average score outcome of our
136 * games and adjust the komi by a fractional step towards the expected
138 * (ii) value-based: While winrate is above given threshold, adjust
139 * the komi by a fixed step in the appropriate direction. [TODO]
140 * These adjustments can be
141 * (a) Move-stepped, new extra komi value is always set only at the
142 * beginning of the tree search for next move;
143 * (b) Continuous, new extra komi value is periodically re-determined
144 * and adjusted throughout a single tree search. [TODO] */
146 struct dynkomi_adaptive
{
147 /* Do not take measured average score into regard for
148 * first @lead_moves - the variance is just too much.
149 * (Instead, we consider the handicap-based komi provided
150 * by linear dynkomi.) */
153 float (*adapter
)(struct dynkomi_adaptive
*a
, struct board
*b
);
154 float adapt_base
; // [0,1)
155 /* Sigmoid adaptation rate parameter; see below for details. */
156 float adapt_phase
; // [0,1]
157 float adapt_rate
; // [1,infty)
158 /* Linear adaptation rate parameter. */
160 float adapt_dir
; // [-1,1]
164 adapter_sigmoid(struct dynkomi_adaptive
*a
, struct board
*b
)
166 /* Figure out how much to adjust the komi based on the game
167 * stage. The adaptation rate is ~0.9 at the beginning,
168 * at game stage a->adapt_phase crosses though 0.5 and
169 * approaches 0 at the game end; the slope is controlled
170 * by a->adapt_rate. */
171 int total_moves
= b
->moves
+ board_estimated_moves_left(b
);
172 float game_portion
= (float) b
->moves
/ total_moves
;
173 float l
= -game_portion
+ a
->adapt_phase
;
174 return 1.0 / (1.0 + exp(-a
->adapt_rate
* l
));
178 adapter_linear(struct dynkomi_adaptive
*a
, struct board
*b
)
180 /* Figure out how much to adjust the komi based on the game
181 * stage. We just linearly increase/decrease the adaptation
182 * rate for first N moves. */
183 if (b
->moves
> a
->adapt_moves
)
185 if (a
->adapt_dir
< 0)
186 return 1 - (- a
->adapt_dir
) * b
->moves
/ a
->adapt_moves
;
188 return a
->adapt_dir
* b
->moves
/ a
->adapt_moves
;
192 uct_dynkomi_adaptive_permove(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
)
194 struct dynkomi_adaptive
*a
= d
->data
;
196 fprintf(stderr
, "m %d/%d ekomi %f permove %f/%d\n",
197 b
->moves
, a
->lead_moves
, tree
->extra_komi
,
198 tree
->score
.value
, tree
->score
.playouts
);
199 if (b
->moves
<= a
->lead_moves
)
200 return board_effective_handicap(b
, 7 /* XXX */);
201 if (tree
->score
.playouts
< 200) // XXX
202 return tree
->extra_komi
;
204 struct move_stats score
= tree
->score
;
205 /* Almost-reset tree->score to gather fresh stats. */
206 tree
->score
.playouts
= 1;
208 /* Look at average score and push extra_komi in that direction. */
209 float p
= a
->adapter(a
, b
);
210 p
= a
->adapt_base
+ p
* (1 - a
->adapt_base
);
211 if (p
> 0.9) p
= 0.9; // don't get too eager!
212 return tree
->extra_komi
+ p
* score
.value
;
216 uct_dynkomi_adaptive_persim(struct uct_dynkomi
*d
, struct board
*b
, struct tree
*tree
, struct tree_node
*node
)
218 return tree
->extra_komi
;
222 uct_dynkomi_init_adaptive(struct uct
*u
, char *arg
, struct board
*b
)
224 struct uct_dynkomi
*d
= calloc(1, sizeof(*d
));
226 d
->permove
= uct_dynkomi_adaptive_permove
;
227 d
->persim
= uct_dynkomi_adaptive_persim
;
228 d
->done
= uct_dynkomi_generic_done
;
230 struct dynkomi_adaptive
*a
= calloc(1, sizeof(*a
));
233 if (board_size(b
) - 2 >= 19)
236 a
->lead_moves
= 4; // XXX
237 a
->adapter
= adapter_sigmoid
;
239 a
->adapt_phase
= 0.5;
240 a
->adapt_moves
= 200;
245 char *optspec
, *next
= arg
;
248 next
+= strcspn(next
, ":");
249 if (*next
) { *next
++ = 0; } else { *next
= 0; }
251 char *optname
= optspec
;
252 char *optval
= strchr(optspec
, '=');
253 if (optval
) *optval
++ = 0;
255 if (!strcasecmp(optname
, "lead_moves") && optval
) {
256 /* Do not adjust komi adaptively for first
258 a
->lead_moves
= atoi(optval
);
259 } else if (!strcasecmp(optname
, "adapter") && optval
) {
260 /* Adaptatation method. */
261 if (!strcasecmp(optval
, "sigmoid")) {
262 a
->adapter
= adapter_sigmoid
;
263 } else if (!strcasecmp(optval
, "linear")) {
264 a
->adapter
= adapter_linear
;
266 fprintf(stderr
, "UCT: Invalid adapter %s\n", optval
);
269 } else if (!strcasecmp(optname
, "adapt_base") && optval
) {
270 /* Adaptation base rate; see above. */
271 a
->adapt_base
= atof(optval
);
272 } else if (!strcasecmp(optname
, "adapt_rate") && optval
) {
273 /* Adaptation slope; see above. */
274 a
->adapt_rate
= atof(optval
);
275 } else if (!strcasecmp(optname
, "adapt_phase") && optval
) {
276 /* Adaptation phase shift; see above. */
277 a
->adapt_phase
= atof(optval
);
278 } else if (!strcasecmp(optname
, "adapt_moves") && optval
) {
279 /* Adaptation move amount; see above. */
280 a
->adapt_moves
= atoi(optval
);
281 } else if (!strcasecmp(optname
, "adapt_dir") && optval
) {
282 /* Adaptation direction vector; see above. */
283 a
->adapt_dir
= atof(optval
);
285 fprintf(stderr
, "uct: Invalid dynkomi argument %s or missing value\n", optname
);