uct/prior.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "random.h"
  11 #include "tactics.h"
  12 #include "uct/internal.h"
  13 #include "uct/prior.h"
  14 #include "uct/tree.h"
  15
  16 /* Applying heuristic values to the tree nodes, skewing the reading in
  17  * most interesting directions. */
  18
  19
  20 void
  21 uct_prior_one(struct uct *u, struct tree_node *node, struct prior_map *map, coord_t c)
  22 {
  23         /* Initialization of UCT values based on prior knowledge */
  24
  25         /* Q_{even} */
  26         /* This may be dubious for normal UCB1 but is essential for
  27          * reading stability of RAVE, it appears. */
  28         if (u->even_eqex) {
  29                 map->prior[c].playouts += u->even_eqex;
  30                 map->prior[c].wins += u->even_eqex / 2;
  31         }
  32
  33         /* Discourage playing into our own eyes. However, we cannot
  34          * completely prohibit it:
  35          * #######
  36          * ...XX.#
  37          * XOOOXX#
  38          * X.OOOO#
  39          * .XXXX.# */
  40         if (board_is_one_point_eye(map->b, &c, map->to_play)) {
  41                 map->prior[c].playouts += u->eqex;
  42                 map->prior[c].wins += map->parity > 0 ? 0 : u->eqex;
  43         }
  44
  45         /* Q_{b19} */
  46         /* Specific hints for 19x19 board - priors for certain edge distances. */
  47         if (u->b19_eqex) {
  48                 int d = coord_edge_distance(c, map->b);
  49                 if (d == 1 || d == 3) {
  50                         /* The bonus applies only with no stones in immediate
  51                          * vincinity. */
  52                         if (!board_stone_radar(map->b, c, 2)) {
  53                                 /* First line: -eqex */
  54                                 /* Third line: +eqex */
  55                                 int v = d == 1 ? -1 : 1;
  56                                 map->prior[c].playouts += u->b19_eqex;
  57                                 map->prior[c].wins += map->parity * v > 0 ? u->b19_eqex : 0;
  58                         }
  59                 }
  60         }
  61
  62         /* Q_{grandparent} */
  63         if (u->gp_eqex && node->parent && node->parent->parent) {
  64                 struct tree_node *gpp = node->parent->parent;
  65                 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
  66                         /* Be careful not to emphasize too random results. */
  67                         if (ni->coord == node->coord && ni->u.playouts > u->gp_eqex) {
  68                                 map->prior[c].playouts += u->gp_eqex;
  69                                 map->prior[c].wins += u->gp_eqex * ni->u.wins / ni->u.playouts;
  70                         }
  71                 }
  72         }
  73
  74         /* Q_{playout-policy} */
  75         if (u->policy_eqex) {
  76                 int assess = 0;
  77                 if (u->playout->assess) {
  78                         struct move m = { c, map->to_play };
  79                         assess = u->playout->assess(u->playout, map->b, &m, u->policy_eqex);
  80                 }
  81                 if (assess) {
  82                         map->prior[c].playouts += abs(assess);
  83                         /* Good moves for enemy are losses for us.
  84                          * We will properly maximize this in the UCB1
  85                          * decision. */
  86                         assess *= map->parity;
  87                         if (assess > 0) map->prior[c].wins += assess;
  88                 }
  89         }
  90 }
  91
  92 void
  93 uct_prior(struct uct *u, struct tree_node *node, struct prior_map *map)
  94 {
  95         uct_prior_one(u, node, map, pass);
  96         foreach_point(map->b) {
  97                 if (!map->consider[c])
  98                         continue;
  99                 uct_prior_one(u, node, map, c);
 100         } foreach_point_end;
 101 }