Merge branch 'master' into greedy2
[pachi.git] / uct / dynkomi.c
blob3ed4145a84c14389f3518f0dba8fa282ee9107fc
1 #define DEBUG
2 #include <assert.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #include "board.h"
9 #include "debug.h"
10 #include "tactics/util.h"
11 #include "uct/dynkomi.h"
12 #include "uct/internal.h"
13 #include "uct/tree.h"
16 static void
17 generic_done(struct uct_dynkomi *d)
19 if (d->data) free(d->data);
20 free(d);
24 /* NONE dynkomi strategy - never fiddle with komi values. */
26 struct uct_dynkomi *
27 uct_dynkomi_init_none(struct uct *u, char *arg, struct board *b)
29 struct uct_dynkomi *d = calloc2(1, sizeof(*d));
30 d->uct = u;
31 d->permove = NULL;
32 d->persim = NULL;
33 d->done = generic_done;
34 d->data = NULL;
36 if (arg) {
37 fprintf(stderr, "uct: Dynkomi method none accepts no arguments\n");
38 exit(1);
41 return d;
45 /* LINEAR dynkomi strategy - Linearly Decreasing Handicap Compensation. */
46 /* At move 0, we impose extra komi of handicap_count*handicap_value, then
47 * we linearly decrease this extra komi throughout the game down to 0
48 * at @moves moves. Towards the end of the game the linear compensation
49 * becomes zero but we increase the extra komi when winning big. This reduces
50 * the number of point-wasting moves and makes the game more enjoyable for humans. */
52 struct dynkomi_linear {
53 int handicap_value[S_MAX];
54 int moves[S_MAX];
55 bool rootbased;
56 /* Increase the extra komi if my win ratio > green_zone but always
57 * keep extra_komi <= komi_ratchet. komi_ratchet starts infinite but decreases
58 * when we give too much extra komi and this puts us back < orange_zone.
59 * This is meant only to increase the territory margin when playing against
60 * weaker opponents. We never take negative komi when losing. The ratchet helps
61 * avoiding oscillations and keeping us above orange_zone.
62 * To disable the adaptive phase, set green_zone=2. */
63 floating_t komi_ratchet;
64 floating_t green_zone;
65 floating_t orange_zone;
66 floating_t drop_step;
69 static floating_t
70 linear_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree)
72 struct dynkomi_linear *l = d->data;
73 enum stone color = d->uct->pondering ? tree->root_color : stone_other(tree->root_color);
74 int lmoves = l->moves[color];
75 if (b->moves < lmoves) {
76 floating_t base_komi = board_effective_handicap(b, l->handicap_value[color]);
77 return base_komi * (lmoves - b->moves) / lmoves;
79 /* Force a transition to extra_komi 0 before the adaptive phase. */
80 if (b->moves <= lmoves + 1) return 0;
82 /* Do not take decisions on unstable value. */
83 if (tree->root->u.playouts < GJ_MINGAMES) return tree->extra_komi;
85 floating_t my_value = tree_node_get_value(tree, 1, tree->root->u.value);
86 /* We normalize komi as in komi_by_value(), > 0 when winning. */
87 floating_t extra_komi = komi_by_color(tree->extra_komi, color);
88 assert(extra_komi >= 0);
90 if (my_value < 0.5 && l->komi_ratchet > 0 && l->komi_ratchet != INFINITY) {
91 if (DEBUGL(0))
92 fprintf(stderr, "losing %f extra %.1f ratchet %.1f -> 0\n",
93 my_value, extra_komi, l->komi_ratchet);
94 /* Disable dynkomi completely, too dangerous in this game. */
95 extra_komi = l->komi_ratchet = 0;
97 } else if (my_value < l->orange_zone && extra_komi > 0) {
98 extra_komi = l->komi_ratchet = fmax(extra_komi - l->drop_step, 0.0);
99 if (DEBUGL(3))
100 fprintf(stderr, "dropping to %f ratchet -> %.1f\n",
101 my_value, extra_komi);
103 } else if (my_value > l->green_zone && extra_komi +1 <= l->komi_ratchet) {
104 extra_komi += 1;
105 if (DEBUGL(3))
106 fprintf(stderr, "winning %f extra_komi -> %.1f, ratchet %.1f\n",
107 my_value, extra_komi, l->komi_ratchet);
109 return komi_by_color(extra_komi, color);
112 static floating_t
113 linear_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node)
115 struct dynkomi_linear *l = d->data;
116 if (l->rootbased)
117 return tree->extra_komi;
118 /* We don't reuse computed value from tree->extra_komi,
119 * since we want to use value correct for this node depth.
120 * This also means the values will stay correct after
121 * node promotion. */
122 return linear_permove(d, b, tree);
125 struct uct_dynkomi *
126 uct_dynkomi_init_linear(struct uct *u, char *arg, struct board *b)
128 struct uct_dynkomi *d = calloc2(1, sizeof(*d));
129 d->uct = u;
130 d->permove = linear_permove;
131 d->persim = linear_persim;
132 d->done = generic_done;
134 struct dynkomi_linear *l = calloc2(1, sizeof(*l));
135 d->data = l;
137 /* Force white to feel behind and try harder, but not to the
138 * point of resigning immediately in high handicap games.
139 * By move 100 white should still be behind but should have
140 * caught up enough to avoid resigning. */
141 if (board_large(b)) {
142 l->moves[S_BLACK] = 150;
143 l->moves[S_WHITE] = 100;
145 /* The real value of one stone is twice the komi so about 15 points.
146 * But use a lower value to avoid being too pessimistic as black
147 * or too optimistic as white. */
148 l->handicap_value[S_BLACK] = 8;
149 l->handicap_value[S_WHITE] = 2;
151 l->komi_ratchet = INFINITY;
152 l->green_zone = 0.85;
153 l->orange_zone = 0.8;
154 l->drop_step = 4.0;
156 if (arg) {
157 char *optspec, *next = arg;
158 while (*next) {
159 optspec = next;
160 next += strcspn(next, ":");
161 if (*next) { *next++ = 0; } else { *next = 0; }
163 char *optname = optspec;
164 char *optval = strchr(optspec, '=');
165 if (optval) *optval++ = 0;
167 if (!strcasecmp(optname, "moves") && optval) {
168 /* Dynamic komi in handicap game; linearly
169 * decreases to basic settings until move
170 * #optval. moves=blackmoves%whitemoves */
171 for (int i = S_BLACK; *optval && i <= S_WHITE; i++) {
172 l->moves[i] = atoi(optval);
173 optval += strcspn(optval, "%");
174 if (*optval) optval++;
176 } else if (!strcasecmp(optname, "handicap_value") && optval) {
177 /* Point value of single handicap stone,
178 * for dynkomi computation. */
179 for (int i = S_BLACK; *optval && i <= S_WHITE; i++) {
180 l->handicap_value[i] = atoi(optval);
181 optval += strcspn(optval, "%");
182 if (*optval) optval++;
184 } else if (!strcasecmp(optname, "rootbased")) {
185 /* If set, the extra komi applied will be
186 * the same for all simulations within a move,
187 * instead of being same for all simulations
188 * within the tree node. */
189 l->rootbased = !optval || atoi(optval);
190 } else if (!strcasecmp(optname, "green_zone") && optval) {
191 /* Increase komi when win ratio is above green_zone */
192 l->green_zone = atof(optval);
193 } else if (!strcasecmp(optname, "orange_zone") && optval) {
194 /* Decrease komi when > 0 and win ratio is below orange_zone */
195 l->orange_zone = atof(optval);
196 } else if (!strcasecmp(optname, "drop_step") && optval) {
197 /* Decrease komi by drop_step points */
198 l->drop_step = atof(optval);
199 } else {
200 fprintf(stderr, "uct: Invalid dynkomi argument %s or missing value\n", optname);
201 exit(1);
206 return d;
210 /* ADAPTIVE dynkomi strategy - Adaptive Situational Compensation */
211 /* We adapt the komi based on current situation:
212 * (i) score-based: We maintain the average score outcome of our
213 * games and adjust the komi by a fractional step towards the expected
214 * score;
215 * (ii) value-based: While winrate is above given threshold, adjust
216 * the komi by a fixed step in the appropriate direction.
217 * These adjustments can be
218 * (a) Move-stepped, new extra komi value is always set only at the
219 * beginning of the tree search for next move;
220 * (b) Continuous, new extra komi value is periodically re-determined
221 * and adjusted throughout a single tree search. */
223 struct dynkomi_adaptive {
224 /* Do not take measured average score into regard for
225 * first @lead_moves - the variance is just too much.
226 * (Instead, we consider the handicap-based komi provided
227 * by linear dynkomi.) */
228 int lead_moves;
229 /* Maximum komi to pretend the opponent to give. */
230 floating_t max_losing_komi;
231 /* Game portion at which losing komi is not allowed anymore. */
232 floating_t losing_komi_stop;
233 /* Alternative game portion determination. */
234 bool adapt_aport;
235 floating_t (*indicator)(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color);
237 /* Value-based adaptation. */
238 floating_t zone_red, zone_green;
239 int score_step;
240 floating_t score_step_byavg; // use portion of average score as increment
241 bool use_komi_ratchet;
242 bool losing_komi_ratchet; // ratchet even losing komi
243 int komi_ratchet_maxage;
244 // runtime, not configuration:
245 int komi_ratchet_age;
246 floating_t komi_ratchet;
248 /* Score-based adaptation. */
249 floating_t (*adapter)(struct uct_dynkomi *d, struct board *b);
250 floating_t adapt_base; // [0,1)
251 /* Sigmoid adaptation rate parameter; see below for details. */
252 floating_t adapt_phase; // [0,1]
253 floating_t adapt_rate; // [1,infty)
254 /* Linear adaptation rate parameter. */
255 int adapt_moves;
256 floating_t adapt_dir; // [-1,1]
258 #define TRUSTWORTHY_KOMI_PLAYOUTS 200
260 static floating_t
261 board_game_portion(struct dynkomi_adaptive *a, struct board *b)
263 if (!a->adapt_aport) {
264 int total_moves = b->moves + 2 * board_estimated_moves_left(b);
265 return (floating_t) b->moves / total_moves;
266 } else {
267 int brsize = board_size(b) - 2;
268 return 1.0 - (floating_t) b->flen / (brsize * brsize);
272 static floating_t
273 adapter_sigmoid(struct uct_dynkomi *d, struct board *b)
275 struct dynkomi_adaptive *a = d->data;
276 /* Figure out how much to adjust the komi based on the game
277 * stage. The adaptation rate is 0 at the beginning,
278 * at game stage a->adapt_phase crosses though 0.5 and
279 * approaches 1 at the game end; the slope is controlled
280 * by a->adapt_rate. */
281 floating_t game_portion = board_game_portion(a, b);
282 floating_t l = game_portion - a->adapt_phase;
283 return 1.0 / (1.0 + exp(-a->adapt_rate * l));
286 static floating_t
287 adapter_linear(struct uct_dynkomi *d, struct board *b)
289 struct dynkomi_adaptive *a = d->data;
290 /* Figure out how much to adjust the komi based on the game
291 * stage. We just linearly increase/decrease the adaptation
292 * rate for first N moves. */
293 if (b->moves > a->adapt_moves)
294 return 0;
295 if (a->adapt_dir < 0)
296 return 1 - (- a->adapt_dir) * b->moves / a->adapt_moves;
297 else
298 return a->adapt_dir * b->moves / a->adapt_moves;
301 static floating_t
302 komi_by_score(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color)
304 struct dynkomi_adaptive *a = d->data;
305 if (d->score.playouts < TRUSTWORTHY_KOMI_PLAYOUTS)
306 return tree->extra_komi;
308 struct move_stats score = d->score;
309 /* Almost-reset tree->score to gather fresh stats. */
310 d->score.playouts = 1;
312 /* Look at average score and push extra_komi in that direction. */
313 floating_t p = a->adapter(d, b);
314 p = a->adapt_base + p * (1 - a->adapt_base);
315 if (p > 0.9) p = 0.9; // don't get too eager!
316 floating_t extra_komi = tree->extra_komi + p * score.value;
317 if (DEBUGL(3))
318 fprintf(stderr, "mC += %f * %f\n", p, score.value);
319 return extra_komi;
322 static floating_t
323 komi_by_value(struct uct_dynkomi *d, struct board *b, struct tree *tree, enum stone color)
325 struct dynkomi_adaptive *a = d->data;
326 if (d->value.playouts < TRUSTWORTHY_KOMI_PLAYOUTS)
327 return tree->extra_komi;
329 struct move_stats value = d->value;
330 /* Almost-reset tree->value to gather fresh stats. */
331 d->value.playouts = 1;
332 /* Correct color POV. */
333 if (color == S_WHITE)
334 value.value = 1 - value.value;
336 /* We have three "value zones":
337 * red zone | yellow zone | green zone
338 * ~45% ~60%
339 * red zone: reduce komi
340 * yellow zone: do not touch komi
341 * green zone: enlage komi.
343 * Also, at some point komi will be tuned in such way
344 * that it will be in green zone but increasing it will
345 * be unfeasible. Thus, we have a _ratchet_ - we will
346 * remember the last komi that has put us into the
347 * red zone, and not use it or go over it. We use the
348 * ratchet only when giving extra komi, we always want
349 * to try to reduce extra komi we take.
351 * TODO: Make the ratchet expire after a while. */
353 /* We use komi_by_color() first to normalize komi
354 * additions/subtractions, then apply it again on
355 * return value to restore original komi parity. */
356 /* Positive extra_komi means that we are _giving_
357 * komi (winning), negative extra_komi is _taking_
358 * komi (losing). */
359 floating_t extra_komi = komi_by_color(tree->extra_komi, color);
360 int score_step_red = -a->score_step;
361 int score_step_green = a->score_step;
363 if (a->score_step_byavg != 0) {
364 struct move_stats score = d->score;
365 /* Almost-reset tree->score to gather fresh stats. */
366 d->score.playouts = 1;
367 /* Correct color POV. */
368 if (color == S_WHITE)
369 score.value = - score.value;
370 if (score.value > 0)
371 score_step_green = round(score.value * a->score_step_byavg);
372 else
373 score_step_red = round(-score.value * a->score_step_byavg);
374 if (score_step_green < 0 || score_step_red > 0) {
375 /* The steps are in bad direction - keep still. */
376 return komi_by_color(extra_komi, color);
380 /* Wear out the ratchet. */
381 if (a->use_komi_ratchet && a->komi_ratchet_maxage > 0) {
382 a->komi_ratchet_age += value.playouts;
383 if (a->komi_ratchet_age > a->komi_ratchet_maxage) {
384 a->komi_ratchet = 1000;
385 a->komi_ratchet_age = 0;
389 if (value.value < a->zone_red) {
390 /* Red zone. Take extra komi. */
391 if (DEBUGL(3))
392 fprintf(stderr, "[red] %f, step %d | komi ratchet %f age %d/%d -> %f\n",
393 value.value, score_step_red, a->komi_ratchet, a->komi_ratchet_age, a->komi_ratchet_maxage, extra_komi);
394 if (a->losing_komi_ratchet || extra_komi > 0) {
395 a->komi_ratchet = extra_komi;
396 a->komi_ratchet_age = 0;
398 extra_komi += score_step_red;
399 return komi_by_color(extra_komi, color);
401 } else if (value.value < a->zone_green) {
402 /* Yellow zone, do nothing. */
403 return komi_by_color(extra_komi, color);
405 } else {
406 /* Green zone. Give extra komi. */
407 if (DEBUGL(3))
408 fprintf(stderr, "[green] %f, step %d | komi ratchet %f age %d/%d\n",
409 value.value, score_step_green, a->komi_ratchet, a->komi_ratchet_age, a->komi_ratchet_maxage);
410 extra_komi += score_step_green;
411 if (a->use_komi_ratchet && extra_komi >= a->komi_ratchet)
412 extra_komi = a->komi_ratchet - 1;
413 return komi_by_color(extra_komi, color);
417 static floating_t
418 bounded_komi(struct dynkomi_adaptive *a, struct board *b,
419 enum stone color, floating_t komi, floating_t max_losing_komi)
421 /* At the end of game, disallow losing komi. */
422 if (komi_by_color(komi, color) < 0
423 && board_game_portion(a, b) > a->losing_komi_stop)
424 return 0;
426 /* Get lower bound on komi we take so that we don't underperform
427 * too much. */
428 floating_t min_komi = komi_by_color(- max_losing_komi, color);
430 if (komi_by_color(komi - min_komi, color) > 0)
431 return komi;
432 else
433 return min_komi;
436 static floating_t
437 adaptive_permove(struct uct_dynkomi *d, struct board *b, struct tree *tree)
439 /* We do not use extra komi at the game end - we are not
440 * to fool ourselves at this point. */
441 if (board_estimated_moves_left(b) <= MIN_MOVES_LEFT) {
442 tree->use_extra_komi = false;
443 return 0;
445 struct dynkomi_adaptive *a = d->data;
446 enum stone color = stone_other(tree->root_color);
447 if (DEBUGL(3))
448 fprintf(stderr, "m %d/%d ekomi %f permove %f/%d\n",
449 b->moves, a->lead_moves, tree->extra_komi,
450 d->score.value, d->score.playouts);
452 if (b->moves <= a->lead_moves)
453 return bounded_komi(a, b, color,
454 board_effective_handicap(b, 7 /* XXX */),
455 a->max_losing_komi);
457 floating_t komi = a->indicator(d, b, tree, color);
458 if (DEBUGL(3))
459 fprintf(stderr, "dynkomi: %f -> %f\n", tree->extra_komi, komi);
460 return bounded_komi(a, b, color, komi, a->max_losing_komi);
463 static floating_t
464 adaptive_persim(struct uct_dynkomi *d, struct board *b, struct tree *tree, struct tree_node *node)
466 return tree->extra_komi;
469 struct uct_dynkomi *
470 uct_dynkomi_init_adaptive(struct uct *u, char *arg, struct board *b)
472 struct uct_dynkomi *d = calloc2(1, sizeof(*d));
473 d->uct = u;
474 d->permove = adaptive_permove;
475 d->persim = adaptive_persim;
476 d->done = generic_done;
478 struct dynkomi_adaptive *a = calloc2(1, sizeof(*a));
479 d->data = a;
481 a->lead_moves = board_large(b) ? 20 : 4; // XXX
482 a->max_losing_komi = 30;
483 a->losing_komi_stop = 1.0f;
484 a->indicator = komi_by_value;
486 a->adapter = adapter_sigmoid;
487 a->adapt_rate = -18;
488 a->adapt_phase = 0.65;
489 a->adapt_moves = 200;
490 a->adapt_dir = -0.5;
492 a->zone_red = 0.45;
493 a->zone_green = 0.50;
494 a->score_step = 1;
495 a->use_komi_ratchet = true;
496 a->komi_ratchet_maxage = 0;
497 a->komi_ratchet = 1000;
499 if (arg) {
500 char *optspec, *next = arg;
501 while (*next) {
502 optspec = next;
503 next += strcspn(next, ":");
504 if (*next) { *next++ = 0; } else { *next = 0; }
506 char *optname = optspec;
507 char *optval = strchr(optspec, '=');
508 if (optval) *optval++ = 0;
510 if (!strcasecmp(optname, "lead_moves") && optval) {
511 /* Do not adjust komi adaptively for first
512 * N moves. */
513 a->lead_moves = atoi(optval);
514 } else if (!strcasecmp(optname, "max_losing_komi") && optval) {
515 a->max_losing_komi = atof(optval);
516 } else if (!strcasecmp(optname, "losing_komi_stop") && optval) {
517 a->losing_komi_stop = atof(optval);
518 } else if (!strcasecmp(optname, "indicator")) {
519 /* Adaptatation indicator - how to decide
520 * the adaptation rate and direction. */
521 if (!strcasecmp(optval, "value")) {
522 /* Winrate w/ komi so far. */
523 a->indicator = komi_by_value;
524 } else if (!strcasecmp(optval, "score")) {
525 /* Expected score w/ current komi. */
526 a->indicator = komi_by_score;
527 } else {
528 fprintf(stderr, "UCT: Invalid indicator %s\n", optval);
529 exit(1);
532 /* value indicator settings */
533 } else if (!strcasecmp(optname, "zone_red") && optval) {
534 a->zone_red = atof(optval);
535 } else if (!strcasecmp(optname, "zone_green") && optval) {
536 a->zone_green = atof(optval);
537 } else if (!strcasecmp(optname, "score_step") && optval) {
538 a->score_step = atoi(optval);
539 } else if (!strcasecmp(optname, "score_step_byavg") && optval) {
540 a->score_step_byavg = atof(optval);
541 } else if (!strcasecmp(optname, "use_komi_ratchet")) {
542 a->use_komi_ratchet = !optval || atoi(optval);
543 } else if (!strcasecmp(optname, "losing_komi_ratchet")) {
544 a->losing_komi_ratchet = !optval || atoi(optval);
545 } else if (!strcasecmp(optname, "komi_ratchet_age") && optval) {
546 a->komi_ratchet_maxage = atoi(optval);
548 /* score indicator settings */
549 } else if (!strcasecmp(optname, "adapter") && optval) {
550 /* Adaptatation method. */
551 if (!strcasecmp(optval, "sigmoid")) {
552 a->adapter = adapter_sigmoid;
553 } else if (!strcasecmp(optval, "linear")) {
554 a->adapter = adapter_linear;
555 } else {
556 fprintf(stderr, "UCT: Invalid adapter %s\n", optval);
557 exit(1);
559 } else if (!strcasecmp(optname, "adapt_base") && optval) {
560 /* Adaptation base rate; see above. */
561 a->adapt_base = atof(optval);
562 } else if (!strcasecmp(optname, "adapt_rate") && optval) {
563 /* Adaptation slope; see above. */
564 a->adapt_rate = atof(optval);
565 } else if (!strcasecmp(optname, "adapt_phase") && optval) {
566 /* Adaptation phase shift; see above. */
567 a->adapt_phase = atof(optval);
568 } else if (!strcasecmp(optname, "adapt_moves") && optval) {
569 /* Adaptation move amount; see above. */
570 a->adapt_moves = atoi(optval);
571 } else if (!strcasecmp(optname, "adapt_aport")) {
572 a->adapt_aport = !optval || atoi(optval);
573 } else if (!strcasecmp(optname, "adapt_dir") && optval) {
574 /* Adaptation direction vector; see above. */
575 a->adapt_dir = atof(optval);
577 } else {
578 fprintf(stderr, "uct: Invalid dynkomi argument %s or missing value\n", optname);
579 exit(1);
584 return d;