uct/prior.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "random.h"
  11 #include "tactics.h"
  12 #include "uct/internal.h"
  13 #include "uct/prior.h"
  14 #include "uct/tree.h"
  15
  16 /* Applying heuristic values to the tree nodes, skewing the reading in
  17  * most interesting directions. */
  18
  19
  20 struct uct_prior {
  21         /* Equivalent experience for prior knowledge. MoGo paper recommends
  22          * 50 playouts per source; in practice, esp. with RAVE, about 6
  23          * playouts per source seems best. */
  24         int eqex;
  25         int even_eqex, policy_eqex, b19_eqex, eye_eqex, ko_eqex;
  26         int cfgdn; int *cfgd_eqex;
  27 };
  28
  29 void
  30 uct_prior_even(struct uct *u, struct tree_node *node, struct prior_map *map)
  31 {
  32         /* Q_{even} */
  33         /* This may be dubious for normal UCB1 but is essential for
  34          * reading stability of RAVE, it appears. */
  35         foreach_point_and_pass(map->b) {
  36                 if (!map->consider[c])
  37                         continue;
  38                 add_prior_value(map, c, 0.5, u->prior->even_eqex);
  39         } foreach_point_end;
  40 }
  41
  42 void
  43 uct_prior_eye(struct uct *u, struct tree_node *node, struct prior_map *map)
  44 {
  45         /* Discourage playing into our own eyes. However, we cannot
  46          * completely prohibit it:
  47          * #######
  48          * ...XX.#
  49          * XOOOXX#
  50          * X.OOOO#
  51          * .XXXX.# */
  52         foreach_point(map->b) {
  53                 if (!map->consider[c])
  54                         continue;
  55                 if (!board_is_one_point_eye(map->b, &c, map->to_play))
  56                         continue;
  57                 add_prior_value(map, c, 0, u->prior->eye_eqex);
  58         } foreach_point_end;
  59 }
  60
  61 void
  62 uct_prior_ko(struct uct *u, struct tree_node *node, struct prior_map *map)
  63 {
  64         /* Favor fighting ko, if we took it le 10 moves ago. */
  65         coord_t ko = map->b->last_ko.coord;
  66         if (is_pass(ko) || map->b->moves - map->b->last_ko_age > 10 || !map->consider[ko])
  67                 return;
  68         // fprintf(stderr, "prior ko-fight @ %s %s\n", stone2str(map->to_play), coord2sstr(ko, map->b));
  69         add_prior_value(map, ko, 1, u->prior->ko_eqex);
  70 }
  71
  72 void
  73 uct_prior_b19(struct uct *u, struct tree_node *node, struct prior_map *map)
  74 {
  75         /* Q_{b19} */
  76         /* Specific hints for 19x19 board - priors for certain edge distances. */
  77         foreach_point(map->b) {
  78                 if (!map->consider[c])
  79                         continue;
  80                 int d = coord_edge_distance(c, map->b);
  81                 if (d != 0 && d != 2)
  82                         continue;
  83                 /* The bonus applies only with no stones in immediate
  84                  * vincinity. */
  85                 if (board_stone_radar(map->b, c, 2))
  86                         continue;
  87                 /* First line: 0 */
  88                 /* Third line: 1 */
  89                 add_prior_value(map, c, d == 2, u->prior->b19_eqex);
  90         } foreach_point_end;
  91 }
  92
  93 void
  94 uct_prior_playout(struct uct *u, struct tree_node *node, struct prior_map *map)
  95 {
  96         /* Q_{playout-policy} */
  97         if (u->playout->assess)
  98                 u->playout->assess(u->playout, map, u->prior->policy_eqex);
  99 }
 100
 101 void
 102 uct_prior_cfgd(struct uct *u, struct tree_node *node, struct prior_map *map)
 103 {
 104         /* Q_{common_fate_graph_distance} */
 105         /* Give bonus to moves local to the last move, where "local" means
 106          * local in terms of groups, not just manhattan distance. */
 107         if (is_pass(map->b->last_move.coord) || is_resign(map->b->last_move.coord))
 108                 return;
 109
 110         foreach_point(map->b) {
 111                 if (!map->consider[c])
 112                         continue;
 113                 if (map->distances[c] > u->prior->cfgdn)
 114                         continue;
 115                 assert(map->distances[c] != 0);
 116                 int bonus = u->prior->cfgd_eqex[map->distances[c]];
 117                 add_prior_value(map, c, 1, bonus);
 118         } foreach_point_end;
 119 }
 120
 121 void
 122 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
 123 {
 124         if (u->prior->even_eqex)
 125                 uct_prior_even(u, node, map);
 126         if (u->prior->eye_eqex)
 127                 uct_prior_eye(u, node, map);
 128         if (u->prior->ko_eqex)
 129                 uct_prior_ko(u, node, map);
 130         if (u->prior->b19_eqex)
 131                 uct_prior_b19(u, node, map);
 132         if (u->prior->policy_eqex)
 133                 uct_prior_playout(u, node, map);
 134         if (u->prior->cfgd_eqex)
 135                 uct_prior_cfgd(u, node, map);
 136 }
 137
 138 struct uct_prior *
 139 uct_prior_init(char *arg, struct board *b)
 140 {
 141         struct uct_prior *p = calloc(1, sizeof(struct uct_prior));
 142
 143         p->ko_eqex = -2;
 144         p->even_eqex = p->policy_eqex = p->b19_eqex = p->eye_eqex = -1;
 145         p->cfgdn = -1;
 146
 147         /* Even number! */
 148         p->eqex = board_size(b)-2 >= 19 ? 20 : 14;
 149
 150         if (arg) {
 151                 char *optspec, *next = arg;
 152                 while (*next) {
 153                         optspec = next;
 154                         next += strcspn(next, ":");
 155                         if (*next) { *next++ = 0; } else { *next = 0; }
 156
 157                         char *optname = optspec;
 158                         char *optval = strchr(optspec, '=');
 159                         if (optval) *optval++ = 0;
 160
 161                         if (!strcasecmp(optname, "eqex") && optval) {
 162                                 p->eqex = atoi(optval);
 163
 164                         } else if (!strcasecmp(optname, "even") && optval) {
 165                                 p->even_eqex = atoi(optval);
 166                         } else if (!strcasecmp(optname, "policy") && optval) {
 167                                 p->policy_eqex = atoi(optval);
 168                         } else if (!strcasecmp(optname, "b19") && optval) {
 169                                 p->b19_eqex = atoi(optval);
 170                         } else if (!strcasecmp(optname, "cfgd") && optval) {
 171                                 /* cfgd=3%40%20%20 - 3 levels; immediate libs
 172                                  * of last move => 40 wins, their neighbors
 173                                  * 20 wins, 2nd-level neighbors 20 wins;
 174                                  * neighbors are group-transitive. */
 175                                 p->cfgdn = atoi(optval); optval += strcspn(optval, ":");
 176                                 p->cfgd_eqex = calloc(p->cfgdn + 1, sizeof(*p->cfgd_eqex));
 177                                 p->cfgd_eqex[0] = 0;
 178                                 for (int i = 1; *optval; i++, optval += strcspn(optval, ":")) {
 179                                         optval++;
 180                                         p->cfgd_eqex[i] = atoi(optval);
 181                                 }
 182                         } else if (!strcasecmp(optname, "eye") && optval) {
 183                                 p->eye_eqex = atoi(optval);
 184                         } else if (!strcasecmp(optname, "ko") && optval) {
 185                                 p->ko_eqex = atoi(optval);
 186                         } else {
 187                                 fprintf(stderr, "uct: Invalid prior argument %s or missing value\n", optname);
 188                                 exit(1);
 189                         }
 190                 }
 191         }
 192
 193         if (p->even_eqex < 0) p->even_eqex = p->eqex / -p->even_eqex;
 194         if (p->policy_eqex < 0) p->policy_eqex = p->eqex / -p->policy_eqex;
 195         if (p->b19_eqex < 0) p->b19_eqex = p->eqex / -p->b19_eqex;
 196         if (p->eye_eqex < 0) p->eye_eqex = p->eqex / -p->eye_eqex;
 197         if (p->ko_eqex < 0) p->ko_eqex = p->eqex / -p->ko_eqex;
 198
 199         if (p->cfgdn < 0) {
 200                 int bonuses[] = { 0, p->eqex, p->eqex / 2, p->eqex / 2 };
 201                 p->cfgdn = 3;
 202                 p->cfgd_eqex = calloc(p->cfgdn + 1, sizeof(*p->cfgd_eqex));
 203                 memcpy(p->cfgd_eqex, bonuses, sizeof(bonuses));
 204         }
 205         if (p->cfgdn > TREE_NODE_D_MAX) {
 206                 fprintf(stderr, "uct: CFG distances only up to %d available\n", TREE_NODE_D_MAX);
 207                 exit(1);
 208         }
 209
 210         return p;
 211 }
 212
 213 void
 214 uct_prior_done(struct uct_prior *p)
 215 {
 216         assert(p->cfgd_eqex);
 217         free(p->cfgd_eqex);
 218         free(p);
 219 }