UCT dynkomi adapt_base: Base value for adaptation parameter
[pachi.git] / uct / dynkomi.c
blob0cb3467b1f9a308fbdf6d0daba7a38a6d0d6501e
1 #include <assert.h>
2 #include <math.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "board.h"
8 #include "debug.h"
9 #include "tactics.h"
10 #include "uct/dynkomi.h"
11 #include "uct/internal.h"
12 #include "uct/tree.h"
15 static void
16 uct_dynkomi_generic_done(struct uct_dynkomi *d)
18 if (d->data) free(d->data);
19 free(d);
23 /* NONE dynkomi strategy - never fiddle with komi values. */
25 struct uct_dynkomi *
26 uct_dynkomi_init_none(struct uct *u, char *arg, struct board *b)
28 struct uct_dynkomi *d = calloc(1, sizeof(*d));
29 d->uct = u;
30 d->permove = NULL;
31 d->persim = NULL;
32 d->done = uct_dynkomi_generic_done;
33 d->data = NULL;
35 if (arg) {
36 fprintf(stderr, "uct: Dynkomi method none accepts no arguments\n");
37 exit(1);
40 return d;
44 /* LINEAR dynkomi strategy - Linearly Decreasing Handicap Compensation. */
45 /* At move 0, we impose extra komi of handicap_count*handicap_value, then
46 * we linearly decrease this extra komi throughout the game down to 0
47 * at @moves moves. */
49 struct dynkomi_linear {
50 int handicap_value;
51 int moves;
52 bool rootbased;
55 float
56 uct_dynkomi_linear_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree)
58 struct dynkomi_linear *l = d->data;
59 if (b->moves >= l->moves)
60 return 0;
62 float base_komi = board_effective_handicap(b, l->handicap_value);
63 float extra_komi = base_komi * (l->moves - b->moves) / l->moves;
64 return extra_komi;
67 float
68 uct_dynkomi_linear_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node)
70 struct dynkomi_linear *l = d->data;
71 if (l->rootbased)
72 return tree->extra_komi;
73 /* We don't reuse computed value from tree->extra_komi,
74 * since we want to use value correct for this node depth.
75 * This also means the values will stay correct after
76 * node promotion. */
77 return uct_dynkomi_linear_permove(d, b, tree);
80 struct uct_dynkomi *
81 uct_dynkomi_init_linear(struct uct *u, char *arg, struct board *b)
83 struct uct_dynkomi *d = calloc(1, sizeof(*d));
84 d->uct = u;
85 d->permove = uct_dynkomi_linear_permove;
86 d->persim = uct_dynkomi_linear_persim;
87 d->done = uct_dynkomi_generic_done;
89 struct dynkomi_linear *l = calloc(1, sizeof(*l));
90 d->data = l;
92 if (board_size(b) - 2 >= 19)
93 l->moves = 200;
94 l->handicap_value = 7;
96 if (arg) {
97 char *optspec, *next = arg;
98 while (*next) {
99 optspec = next;
100 next += strcspn(next, ":");
101 if (*next) { *next++ = 0; } else { *next = 0; }
103 char *optname = optspec;
104 char *optval = strchr(optspec, '=');
105 if (optval) *optval++ = 0;
107 if (!strcasecmp(optname, "moves") && optval) {
108 /* Dynamic komi in handicap game; linearly
109 * decreases to basic settings until move
110 * #optval. */
111 l->moves = atoi(optval);
112 } else if (!strcasecmp(optname, "handicap_value") && optval) {
113 /* Point value of single handicap stone,
114 * for dynkomi computation. */
115 l->handicap_value = atoi(optval);
116 } else if (!strcasecmp(optname, "rootbased")) {
117 /* If set, the extra komi applied will be
118 * the same for all simulations within a move,
119 * instead of being same for all simulations
120 * within the tree node. */
121 l->rootbased = !optval || atoi(optval);
122 } else {
123 fprintf(stderr, "uct: Invalid dynkomi argument %s or missing value\n", optname);
124 exit(1);
129 return d;
133 /* ADAPTIVE dynkomi strategy - Adaptive Situational Compensation */
134 /* We adapt the komi based on current situation:
135 * (i) score-based: We maintain the average score outcome of our
136 * games and adjust the komi by a fractional step towards the expected
137 * score;
138 * (ii) value-based: While winrate is above given threshold, adjust
139 * the komi by a fixed step in the appropriate direction. [TODO]
140 * These adjustments can be
141 * (a) Move-stepped, new extra komi value is always set only at the
142 * beginning of the tree search for next move;
143 * (b) Continuous, new extra komi value is periodically re-determined
144 * and adjusted throughout a single tree search. [TODO] */
146 struct dynkomi_adaptive {
147 /* Do not take measured average score into regard for
148 * first @lead_moves - the variance is just too much.
149 * (Instead, we consider the handicap-based komi provided
150 * by linear dynkomi.) */
151 int lead_moves;
153 float (*adapter)(struct dynkomi_adaptive *a, struct board *b);
154 float adapt_base; // [0,1)
155 /* Sigmoid adaptation rate parameter; see below for details. */
156 float adapt_phase; // [0,1]
157 float adapt_rate; // [1,infty)
158 /* Linear adaptation rate parameter. */
159 int adapt_moves;
160 float adapt_dir; // [-1,1]
163 float
164 adapter_sigmoid(struct dynkomi_adaptive *a, struct board *b)
166 /* Figure out how much to adjust the komi based on the game
167 * stage. The adaptation rate is ~0.9 at the beginning,
168 * at game stage a->adapt_phase crosses though 0.5 and
169 * approaches 0 at the game end; the slope is controlled
170 * by a->adapt_rate. */
171 int total_moves = b->moves + board_estimated_moves_left(b);
172 float game_portion = (float) b->moves / total_moves;
173 float l = -game_portion + a->adapt_phase;
174 return 1.0 / (1.0 + exp(-a->adapt_rate * l));
177 float
178 adapter_linear(struct dynkomi_adaptive *a, struct board *b)
180 /* Figure out how much to adjust the komi based on the game
181 * stage. We just linearly increase/decrease the adaptation
182 * rate for first N moves. */
183 if (b->moves > a->adapt_moves)
184 return 0;
185 if (a->adapt_dir < 0)
186 return 1 - (- a->adapt_dir) * b->moves / a->adapt_moves;
187 else
188 return a->adapt_dir * b->moves / a->adapt_moves;
191 float
192 uct_dynkomi_adaptive_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree)
194 struct dynkomi_adaptive *a = d->data;
195 if (DEBUGL(3))
196 fprintf(stderr, "m %d/%d ekomi %f permove %f/%d\n",
197 b->moves, a->lead_moves, tree->extra_komi,
198 tree->score.value, tree->score.playouts);
199 if (b->moves <= a->lead_moves)
200 return board_effective_handicap(b, 7 /* XXX */);
201 if (tree->score.playouts < 200) // XXX
202 return tree->extra_komi;
204 struct move_stats score = tree->score;
205 /* Almost-reset tree->score to gather fresh stats. */
206 tree->score.playouts = 1;
208 /* Look at average score and push extra_komi in that direction. */
209 float p = a->adapter(a, b);
210 p = a->adapt_base + p * (1 - a->adapt_base);
211 if (p > 0.9) p = 0.9; // don't get too eager!
212 return tree->extra_komi + p * score.value;
215 float
216 uct_dynkomi_adaptive_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node)
218 return tree->extra_komi;
221 struct uct_dynkomi *
222 uct_dynkomi_init_adaptive(struct uct *u, char *arg, struct board *b)
224 struct uct_dynkomi *d = calloc(1, sizeof(*d));
225 d->uct = u;
226 d->permove = uct_dynkomi_adaptive_permove;
227 d->persim = uct_dynkomi_adaptive_persim;
228 d->done = uct_dynkomi_generic_done;
230 struct dynkomi_adaptive *a = calloc(1, sizeof(*a));
231 d->data = a;
233 if (board_size(b) - 2 >= 19)
234 a->lead_moves = 20;
235 else
236 a->lead_moves = 4; // XXX
237 a->adapter = adapter_sigmoid;
238 a->adapt_rate = 20;
239 a->adapt_phase = 0.5;
240 a->adapt_moves = 200;
241 a->adapt_dir = -0.5;
242 a->adapt_base = 0.2;
244 if (arg) {
245 char *optspec, *next = arg;
246 while (*next) {
247 optspec = next;
248 next += strcspn(next, ":");
249 if (*next) { *next++ = 0; } else { *next = 0; }
251 char *optname = optspec;
252 char *optval = strchr(optspec, '=');
253 if (optval) *optval++ = 0;
255 if (!strcasecmp(optname, "lead_moves") && optval) {
256 /* Do not adjust komi adaptively for first
257 * N moves. */
258 a->lead_moves = atoi(optval);
259 } else if (!strcasecmp(optname, "adapter") && optval) {
260 /* Adaptatation method. */
261 if (!strcasecmp(optval, "sigmoid")) {
262 a->adapter = adapter_sigmoid;
263 } else if (!strcasecmp(optval, "linear")) {
264 a->adapter = adapter_linear;
265 } else {
266 fprintf(stderr, "UCT: Invalid adapter %s\n", optval);
267 exit(1);
269 } else if (!strcasecmp(optname, "adapt_base") && optval) {
270 /* Adaptation base rate; see above. */
271 a->adapt_base = atof(optval);
272 } else if (!strcasecmp(optname, "adapt_rate") && optval) {
273 /* Adaptation slope; see above. */
274 a->adapt_rate = atof(optval);
275 } else if (!strcasecmp(optname, "adapt_phase") && optval) {
276 /* Adaptation phase shift; see above. */
277 a->adapt_phase = atof(optval);
278 } else if (!strcasecmp(optname, "adapt_moves") && optval) {
279 /* Adaptation move amount; see above. */
280 a->adapt_moves = atoi(optval);
281 } else if (!strcasecmp(optname, "adapt_dir") && optval) {
282 /* Adaptation direction vector; see above. */
283 a->adapt_dir = atof(optval);
284 } else {
285 fprintf(stderr, "uct: Invalid dynkomi argument %s or missing value\n", optname);
286 exit(1);
291 return d;