uct/prior.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "board.h"
   8 #include "debug.h"
   9 #include "move.h"
  10 #include "random.h"
  11 #include "uct/internal.h"
  12 #include "uct/prior.h"
  13 #include "uct/tree.h"
  14
  15 /* Applying heuristic values to the tree nodes, skewing the reading in
  16  * most interesting directions. */
  17
  18
  19 void
  20 uct_prior(struct uct *u, struct tree *tree, struct tree_node *node,
  21           struct board *b, enum stone color, int parity)
  22 {
  23         /* Initialization of UCT values based on prior knowledge */
  24
  25         /* Q_{even} */
  26         /* This may be dubious for normal UCB1 but is essential for
  27          * reading stability of RAVE, it appears. */
  28         if (u->even_eqex) {
  29                 node->prior.playouts += u->even_eqex;
  30                 node->prior.wins += u->even_eqex / 2;
  31         }
  32
  33         /* Discourage playing into our own eyes. However, we cannot
  34          * completely prohibit it:
  35          * ######
  36          * ...XX.
  37          * XOOOXX
  38          * X.OOOO
  39          * .XXXX. */
  40         if (board_is_one_point_eye(b, &node->coord, color)) {
  41                 node->prior.playouts += u->eqex;
  42                 node->prior.wins += tree_parity(tree, parity) > 0 ? 0 : u->eqex;
  43         }
  44
  45         /* Q_{grandparent} */
  46         if (u->gp_eqex && node->parent && node->parent->parent && node->parent->parent->parent) {
  47                 struct tree_node *gpp = node->parent->parent->parent;
  48                 for (struct tree_node *ni = gpp->children; ni; ni = ni->sibling) {
  49                         /* Be careful not to emphasize too random results. */
  50                         if (ni->coord == node->coord && ni->u.playouts > u->gp_eqex) {
  51                                 node->prior.playouts += u->gp_eqex;
  52                                 node->prior.wins += u->gp_eqex * ni->u.wins / ni->u.playouts;
  53                                 node->hints |= 1;
  54                         }
  55                 }
  56         }
  57
  58         /* Q_{playout-policy} */
  59         if (u->policy_eqex) {
  60                 int assess = 0;
  61                 struct playout_policy *playout = u->playout;
  62                 if (playout->assess) {
  63                         struct move m = { node->coord, color };
  64                         assess = playout->assess(playout, b, &m, u->policy_eqex);
  65                 }
  66                 if (assess) {
  67                         node->prior.playouts += abs(assess);
  68                         /* Good moves for enemy are losses for us.
  69                          * We will properly maximize this in the UCB1
  70                          * decision. */
  71                         assess *= tree_parity(tree, parity);
  72                         if (assess > 0) node->prior.wins += assess;
  73                         node->hints |= 2;
  74                 }
  75         }
  76
  77         if (node->prior.playouts) {
  78                 node->prior.value = (float) node->prior.wins / node->prior.playouts;
  79                 tree_update_node_value(node, u->amaf_prior);
  80         }
  81
  82         //fprintf(stderr, "%s,%s prior: %d/%d = %f (%f)\n", coord2sstr(node->parent->coord, b), coord2sstr(node->coord, b), node->prior.wins, node->prior.playouts, node->prior.value, assess);
  83 }