uct/prior.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "random.h"
  11 #include "tactics.h"
  12 #include "uct/internal.h"
  13 #include "uct/prior.h"
  14 #include "uct/tree.h"
  15
  16 /* Applying heuristic values to the tree nodes, skewing the reading in
  17  * most interesting directions. */
  18
  19
  20 void
  21 uct_prior_even(struct uct *u, struct tree_node *node, struct prior_map *map)
  22 {
  23         /* Q_{even} */
  24         /* This may be dubious for normal UCB1 but is essential for
  25          * reading stability of RAVE, it appears. */
  26         foreach_point_and_pass(map->b) {
  27                 if (!map->consider[c])
  28                         continue;
  29                 map->prior[c].playouts += u->even_eqex;
  30                 map->prior[c].wins += u->even_eqex / 2;
  31         } foreach_point_end;
  32 }
  33
  34 void
  35 uct_prior_eye(struct uct *u, struct tree_node *node, struct prior_map *map)
  36 {
  37         /* Discourage playing into our own eyes. However, we cannot
  38          * completely prohibit it:
  39          * #######
  40          * ...XX.#
  41          * XOOOXX#
  42          * X.OOOO#
  43          * .XXXX.# */
  44         foreach_point_and_pass(map->b) {
  45                 if (!map->consider[c])
  46                         continue;
  47                 if (board_is_one_point_eye(map->b, &c, map->to_play)) {
  48                         map->prior[c].playouts += u->eqex;
  49                         map->prior[c].wins += map->parity > 0 ? 0 : u->eqex;
  50                 }
  51         } foreach_point_end;
  52 }
  53
  54 void
  55 uct_prior_b19(struct uct *u, struct tree_node *node, struct prior_map *map)
  56 {
  57         /* Q_{b19} */
  58         /* Specific hints for 19x19 board - priors for certain edge distances. */
  59         foreach_point_and_pass(map->b) {
  60                 if (!map->consider[c])
  61                         continue;
  62                 int d = coord_edge_distance(c, map->b);
  63                 if (d == 1 || d == 3) {
  64                         /* The bonus applies only with no stones in immediate
  65                          * vincinity. */
  66                         if (!board_stone_radar(map->b, c, 2)) {
  67                                 /* First line: -eqex */
  68                                 /* Third line: +eqex */
  69                                 int v = d == 1 ? -1 : 1;
  70                                 map->prior[c].playouts += u->b19_eqex;
  71                                 map->prior[c].wins += map->parity * v > 0 ? u->b19_eqex : 0;
  72                         }
  73                 }
  74         } foreach_point_end;
  75 }
  76
  77 void
  78 uct_prior_grandparent(struct uct *u, struct tree_node *node, struct prior_map *map)
  79 {
  80         /* Q_{grandparent} */
  81         foreach_point_and_pass(map->b) {
  82                 if (!map->consider[c])
  83                         continue;
  84                 if (!node->parent || !node->parent->parent)
  85                         continue;
  86                 struct tree_node *gpp = node->parent->parent;
  87                 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
  88                         /* Be careful not to emphasize too random results. */
  89                         if (ni->coord == node->coord && ni->u.playouts > u->gp_eqex) {
  90                                 map->prior[c].playouts += u->gp_eqex;
  91                                 map->prior[c].wins += u->gp_eqex * ni->u.wins / ni->u.playouts;
  92                         }
  93                 }
  94         } foreach_point_end;
  95 }
  96
  97 void
  98 uct_prior_playout(struct uct *u, struct tree_node *node, struct prior_map *map)
  99 {
 100         /* Q_{playout-policy} */
 101         foreach_point_and_pass(map->b) {
 102                 if (!map->consider[c])
 103                         continue;
 104                 int assess = 0;
 105                 if (u->playout->assess) {
 106                         struct move m = { c, map->to_play };
 107                         assess = u->playout->assess(u->playout, map->b, &m, u->policy_eqex);
 108                 }
 109                 if (assess) {
 110                         map->prior[c].playouts += abs(assess);
 111                         /* Good moves for enemy are losses for us.
 112                          * We will properly maximize this in the UCB1
 113                          * decision. */
 114                         assess *= map->parity;
 115                         if (assess > 0) map->prior[c].wins += assess;
 116                 }
 117         } foreach_point_end;
 118 }
 119
 120 void
 121 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
 122 {
 123         if (u->even_eqex)
 124                 uct_prior_even(u, node, map);
 125         uct_prior_eye(u, node, map);
 126         if (u->b19_eqex)
 127                 uct_prior_b19(u, node, map);
 128         if (u->gp_eqex)
 129                 uct_prior_grandparent(u, node, map);
 130         if (u->policy_eqex)
 131                 uct_prior_playout(u, node, map);
 132 }